set tensor core opt options in Renderer (#4896)

2026-01-09 23:18:04 -05:00 · 2024-06-10 02:12:41 +08:00
parent f42183ba28
commit 05d7ab774f
2 changed files with 8 additions and 3 deletions
--- a/tinygrad/codegen/kernel.py
+++ b/tinygrad/codegen/kernel.py
@@ -384,7 +384,7 @@ class Kernel:
        return True
    return False

-  def apply_tensor_cores(self, use_tensor_cores=1, extra_opts:Optional[List[Opt]]=None, axis:int=0, tc_opt:int=getenv("TC_OPT")) -> bool:
+  def apply_tensor_cores(self, use_tensor_cores=1, extra_opts:Optional[List[Opt]]=None, axis:int=0, tc_opt:Optional[int]=None) -> bool:
    """ Attempts to apply a tensor core optimization to the kernel.  If one exists and applies properly, return true, otherwise return false.
    Tensor cores are optimized instructions that matrix multiply-accumulate across a wave of threads: D(M, N) = A(M, K) * B(K, N) + C(M, N).

@@ -399,6 +399,7 @@ class Kernel:
      1: allows kernels with multiple reduce axes and also multiplication of UnaryOps.CAST'd buffers
      2: allows kernels with M, N, K axes that are not multiples of the tensor core dimensions by applying padding those axes as needed
    """
+    if tc_opt is None: tc_opt = self.opts.tc_opt
    if not self.opts.tensor_cores and use_tensor_cores != 2: return False
    try: # check TC first and apply hand-coded opts if successful
      self.apply_opt(Opt(OptOps.TC, axis, tc_opt))
@@ -431,7 +432,7 @@ class Kernel:
    if opt.op is OptOps.TC:
      check(len(self.applied_opts) == 0, "tensor core opts must be first") # TODO: things like PADTO might be fine
      check(opt.axis is not None and opt.amt is not None, "tensor core opts must have an axis and amt")
-      check((use_tensor_cores:=getenv("TC", 1)) == 2 or len(self.opts.tensor_cores) > 0, "must have tensor cores or TC=2")
+      check((use_tensor_cores:=self.opts.tc) == 2 or len(self.opts.tensor_cores) > 0, "must have tensor cores or TC=2")
      check(self._apply_tc_opt(use_tensor_cores, cast(int, opt.axis), cast(int, opt.amt)), "no tensor core available")
      self.applied_opts.append(opt)
      return
--- a/tinygrad/renderer/init.py
+++ b/tinygrad/renderer/init.py
@@ -1,7 +1,7 @@
 from typing import Optional, List, Tuple, Dict
 import functools
 from dataclasses import dataclass
-from tinygrad.helpers import to_function_name
+from tinygrad.helpers import getenv, to_function_name
 from tinygrad.codegen.uops import UOpGraph
 from tinygrad.shape.symbolic import sym_infer, sint, Variable
 from tinygrad.dtype import DType
@@ -57,5 +57,9 @@ class Renderer:
  local_max: Optional[List[int]] = None
  shared_max: int = 32768
  tensor_cores: List[TensorCore] = []
+  @functools.cached_property
+  def tc_opt(self): return getenv("TC_OPT")
+  @functools.cached_property
+  def tc(self): return getenv("TC", 1)

  def render(self, name:str, uops:UOpGraph) -> str: raise NotImplementedError("needs a renderer")