linearizer ast as a tuple of lazyops (#3689)

* multi store op linearizer

* currently we do only one output per kernel

* named opts
This commit is contained in:
qazal
2024-03-12 00:39:04 +02:00
committed by GitHub
parent d0bcc9a66b
commit aec4c4f01b
6 changed files with 14 additions and 12 deletions

View File

@@ -240,7 +240,7 @@ result = Tensor(2.0).realize() + Tensor(3.0).realize()
from tinygrad.codegen.linearizer import Linearizer
from tinygrad.realize import create_schedule
sched = create_schedule([result.lazydata])
linearizer = Linearizer(sched[-1].ast, ClangCompiler.linearizer_opts)
linearizer = Linearizer(sched[-1].ast, opts=ClangCompiler.linearizer_opts)
linearizer.linearize()
# print the uops

View File

@@ -43,18 +43,18 @@ if __name__ == "__main__":
lins:List[Linearizer] = []
# always try hand coded opt
lin = Linearizer(si.ast, device.compiler.linearizer_opts)
lin = Linearizer(si.ast, opts=device.compiler.linearizer_opts)
lin.hand_coded_optimizations()
lins.append(lin)
# maybe try tensor cores
lin = Linearizer(si.ast, device.compiler.linearizer_opts)
lin = Linearizer(si.ast, opts=device.compiler.linearizer_opts)
if lin.apply_tensor_cores():
lins.append(lin)
# try a beam search
if beam:=getenv("BEAM"):
lin = Linearizer(si.ast, device.compiler.linearizer_opts)
lin = Linearizer(si.ast, opts=device.compiler.linearizer_opts)
lin = beam_search(lin, rawbufs, beam, bool(getenv("BEAM_ESTIMATE", 1)))
lins.append(lin)

View File

@@ -9,7 +9,7 @@ inf, nan = float('inf'), float('nan')
# kernel unpacker
from tinygrad.codegen.linearizer import Linearizer
def ast_str_to_ast(ast_str:str) -> LazyOp: return eval(ast_str)
def ast_str_to_lin(ast_str:str, opts=None): return Linearizer(ast_str_to_ast(ast_str), opts)
def ast_str_to_lin(ast_str:str, opts=None): return Linearizer(ast_str_to_ast(ast_str), opts=opts)
# load worlds, a dataset of about 12k kernels
import gzip

View File

@@ -204,7 +204,7 @@ class TestLinearizer(unittest.TestCase):
ConstBuffer(42, dtypes.float, ShapeTracker(views=(View(shape=(), strides=(), offset=0, mask=None, contiguous=True),))))
ast = LazyOp(BufferOps.STORE, (ast,),
MemBuffer(0, dtypes.float, ShapeTracker(views=(View(shape=(), strides=(), offset=0, mask=None, contiguous=True),))))
lin = Linearizer(ast=ast) # this is a dummy ast
lin = Linearizer(ast) # this is a dummy ast
lin.uops = UOpGraph()
return lin.uops.add(uop, dtype, vin, arg, cachable=False)

View File

@@ -86,11 +86,13 @@ class LinearizerOptions(NamedTuple):
local_max: Optional[List[int]] = None
class Kernel:
def __init__(self, ast:LazyOp, opts:Optional[LinearizerOptions]=None):
def __init__(self, *ast:LazyOp, opts:Optional[LinearizerOptions]=None):
self.opts = opts or (device.compiler.linearizer_opts if isinstance(device:=Device[Device.DEFAULT], Compiled) and device.compiler is not None else
LinearizerOptions(Device.DEFAULT))
self.ast = ast
assert ast.op == BufferOps.STORE, f"kernels must have a store as the output, got {ast.op}"
assert all(op.op is BufferOps.STORE for op in ast), f"kernels must have stores as the output, got {ast}"
assert len(set(op.arg.st.size for op in ast)) == 1, f"all outbufs should have the same size, got {[op.arg.st for op in ast]}"
assert len(ast) == 1, "max one output per kernel"
self.ast = ast[0]
# fetch lazyop info
self.info: FlopCounter = get_lazyop_info(self.ast)

View File

@@ -248,16 +248,16 @@ class Compiled:
from tinygrad.features.graph import print_tree
print_tree(ast)
from tinygrad.codegen.linearizer import Linearizer
k = Linearizer(ast, self.compiler.linearizer_opts)
k = Linearizer(ast, opts=self.compiler.linearizer_opts)
k.required_optimizations()
if not NOOPT:
if not (used_tensor_cores:=k.apply_tensor_cores(getenv("TC", 1))): k.hand_coded_optimizations()
if BEAM >= 1:
lins = [(("tc" if used_tensor_cores else "hc"), k)]
if used_tensor_cores:
lins.append(("hc", Linearizer(ast, self.compiler.linearizer_opts)))
lins.append(("hc", Linearizer(ast, opts=self.compiler.linearizer_opts)))
lins[-1][1].hand_coded_optimizations()
kb = Linearizer(ast, self.compiler.linearizer_opts)
kb = Linearizer(ast, opts=self.compiler.linearizer_opts)
kb.required_optimizations()
from tinygrad.features.search import beam_search, time_linearizer, bufs_from_lin
test_rawbuffers = bufs_from_lin(kb) # allocate scratch buffers for optimization