set tensor core opt options in Renderer (#4896)

This commit is contained in:
qazal
2024-06-10 02:12:41 +08:00
committed by GitHub
parent f42183ba28
commit 05d7ab774f
2 changed files with 8 additions and 3 deletions

View File

@@ -384,7 +384,7 @@ class Kernel:
return True
return False
def apply_tensor_cores(self, use_tensor_cores=1, extra_opts:Optional[List[Opt]]=None, axis:int=0, tc_opt:int=getenv("TC_OPT")) -> bool:
def apply_tensor_cores(self, use_tensor_cores=1, extra_opts:Optional[List[Opt]]=None, axis:int=0, tc_opt:Optional[int]=None) -> bool:
""" Attempts to apply a tensor core optimization to the kernel. If one exists and applies properly, return true, otherwise return false.
Tensor cores are optimized instructions that matrix multiply-accumulate across a wave of threads: D(M, N) = A(M, K) * B(K, N) + C(M, N).
@@ -399,6 +399,7 @@ class Kernel:
1: allows kernels with multiple reduce axes and also multiplication of UnaryOps.CAST'd buffers
2: allows kernels with M, N, K axes that are not multiples of the tensor core dimensions by applying padding those axes as needed
"""
if tc_opt is None: tc_opt = self.opts.tc_opt
if not self.opts.tensor_cores and use_tensor_cores != 2: return False
try: # check TC first and apply hand-coded opts if successful
self.apply_opt(Opt(OptOps.TC, axis, tc_opt))
@@ -431,7 +432,7 @@ class Kernel:
if opt.op is OptOps.TC:
check(len(self.applied_opts) == 0, "tensor core opts must be first") # TODO: things like PADTO might be fine
check(opt.axis is not None and opt.amt is not None, "tensor core opts must have an axis and amt")
check((use_tensor_cores:=getenv("TC", 1)) == 2 or len(self.opts.tensor_cores) > 0, "must have tensor cores or TC=2")
check((use_tensor_cores:=self.opts.tc) == 2 or len(self.opts.tensor_cores) > 0, "must have tensor cores or TC=2")
check(self._apply_tc_opt(use_tensor_cores, cast(int, opt.axis), cast(int, opt.amt)), "no tensor core available")
self.applied_opts.append(opt)
return

View File

@@ -1,7 +1,7 @@
from typing import Optional, List, Tuple, Dict
import functools
from dataclasses import dataclass
from tinygrad.helpers import to_function_name
from tinygrad.helpers import getenv, to_function_name
from tinygrad.codegen.uops import UOpGraph
from tinygrad.shape.symbolic import sym_infer, sint, Variable
from tinygrad.dtype import DType
@@ -57,5 +57,9 @@ class Renderer:
local_max: Optional[List[int]] = None
shared_max: int = 32768
tensor_cores: List[TensorCore] = []
@functools.cached_property
def tc_opt(self): return getenv("TC_OPT")
@functools.cached_property
def tc(self): return getenv("TC", 1)
def render(self, name:str, uops:UOpGraph) -> str: raise NotImplementedError("needs a renderer")