diff --git a/tinygrad/codegen/kernel.py b/tinygrad/codegen/kernel.py index 25fd540921..a12ecf1348 100644 --- a/tinygrad/codegen/kernel.py +++ b/tinygrad/codegen/kernel.py @@ -384,7 +384,7 @@ class Kernel: return True return False - def apply_tensor_cores(self, use_tensor_cores=1, extra_opts:Optional[List[Opt]]=None, axis:int=0, tc_opt:int=getenv("TC_OPT")) -> bool: + def apply_tensor_cores(self, use_tensor_cores=1, extra_opts:Optional[List[Opt]]=None, axis:int=0, tc_opt:Optional[int]=None) -> bool: """ Attempts to apply a tensor core optimization to the kernel. If one exists and applies properly, return true, otherwise return false. Tensor cores are optimized instructions that matrix multiply-accumulate across a wave of threads: D(M, N) = A(M, K) * B(K, N) + C(M, N). @@ -399,6 +399,7 @@ class Kernel: 1: allows kernels with multiple reduce axes and also multiplication of UnaryOps.CAST'd buffers 2: allows kernels with M, N, K axes that are not multiples of the tensor core dimensions by applying padding those axes as needed """ + if tc_opt is None: tc_opt = self.opts.tc_opt if not self.opts.tensor_cores and use_tensor_cores != 2: return False try: # check TC first and apply hand-coded opts if successful self.apply_opt(Opt(OptOps.TC, axis, tc_opt)) @@ -431,7 +432,7 @@ class Kernel: if opt.op is OptOps.TC: check(len(self.applied_opts) == 0, "tensor core opts must be first") # TODO: things like PADTO might be fine check(opt.axis is not None and opt.amt is not None, "tensor core opts must have an axis and amt") - check((use_tensor_cores:=getenv("TC", 1)) == 2 or len(self.opts.tensor_cores) > 0, "must have tensor cores or TC=2") + check((use_tensor_cores:=self.opts.tc) == 2 or len(self.opts.tensor_cores) > 0, "must have tensor cores or TC=2") check(self._apply_tc_opt(use_tensor_cores, cast(int, opt.axis), cast(int, opt.amt)), "no tensor core available") self.applied_opts.append(opt) return diff --git a/tinygrad/renderer/__init__.py b/tinygrad/renderer/__init__.py index 897039a9c6..31b688cd98 100644 --- a/tinygrad/renderer/__init__.py +++ b/tinygrad/renderer/__init__.py @@ -1,7 +1,7 @@ from typing import Optional, List, Tuple, Dict import functools from dataclasses import dataclass -from tinygrad.helpers import to_function_name +from tinygrad.helpers import getenv, to_function_name from tinygrad.codegen.uops import UOpGraph from tinygrad.shape.symbolic import sym_infer, sint, Variable from tinygrad.dtype import DType @@ -57,5 +57,9 @@ class Renderer: local_max: Optional[List[int]] = None shared_max: int = 32768 tensor_cores: List[TensorCore] = [] + @functools.cached_property + def tc_opt(self): return getenv("TC_OPT") + @functools.cached_property + def tc(self): return getenv("TC", 1) def render(self, name:str, uops:UOpGraph) -> str: raise NotImplementedError("needs a renderer")