linearizer ast as a tuple of lazyops (#3689)

* multi store op linearizer * currently we do only one output per kernel * named opts
2026-01-09 15:08:02 -05:00 · 2024-03-12 00:39:04 +02:00
parent d0bcc9a66b
commit aec4c4f01b
6 changed files with 14 additions and 12 deletions
--- a/docs/abstractions.py
+++ b/docs/abstractions.py
@@ -240,7 +240,7 @@ result = Tensor(2.0).realize() + Tensor(3.0).realize()
 from tinygrad.codegen.linearizer import Linearizer
 from tinygrad.realize import create_schedule
 sched = create_schedule([result.lazydata])
-linearizer = Linearizer(sched[-1].ast, ClangCompiler.linearizer_opts)
+linearizer = Linearizer(sched[-1].ast, opts=ClangCompiler.linearizer_opts)
 linearizer.linearize()

 # print the uops
--- a/examples/handcode_resnet50_opt.py
+++ b/examples/handcode_resnet50_opt.py
@@ -43,18 +43,18 @@ if __name__ == "__main__":
    lins:List[Linearizer] = []

    # always try hand coded opt
-    lin = Linearizer(si.ast, device.compiler.linearizer_opts)
+    lin = Linearizer(si.ast, opts=device.compiler.linearizer_opts)
    lin.hand_coded_optimizations()
    lins.append(lin)

    # maybe try tensor cores
-    lin = Linearizer(si.ast, device.compiler.linearizer_opts)
+    lin = Linearizer(si.ast, opts=device.compiler.linearizer_opts)
    if lin.apply_tensor_cores():
      lins.append(lin)

    # try a beam search
    if beam:=getenv("BEAM"):
-      lin = Linearizer(si.ast, device.compiler.linearizer_opts)
+      lin = Linearizer(si.ast, opts=device.compiler.linearizer_opts)
      lin = beam_search(lin, rawbufs, beam, bool(getenv("BEAM_ESTIMATE", 1)))
      lins.append(lin)

--- a/extra/optimization/helpers.py
+++ b/extra/optimization/helpers.py
@@ -9,7 +9,7 @@ inf, nan = float('inf'), float('nan')
 # kernel unpacker
 from tinygrad.codegen.linearizer import Linearizer
 def ast_str_to_ast(ast_str:str) -> LazyOp: return eval(ast_str)
-def ast_str_to_lin(ast_str:str, opts=None): return Linearizer(ast_str_to_ast(ast_str), opts)
+def ast_str_to_lin(ast_str:str, opts=None): return Linearizer(ast_str_to_ast(ast_str), opts=opts)

 # load worlds, a dataset of about 12k kernels
 import gzip
--- a/test/test_linearizer.py
+++ b/test/test_linearizer.py
@@ -204,7 +204,7 @@ class TestLinearizer(unittest.TestCase):
                   ConstBuffer(42, dtypes.float, ShapeTracker(views=(View(shape=(), strides=(), offset=0, mask=None, contiguous=True),))))
      ast = LazyOp(BufferOps.STORE, (ast,),
                   MemBuffer(0, dtypes.float, ShapeTracker(views=(View(shape=(), strides=(), offset=0, mask=None, contiguous=True),))))
-      lin = Linearizer(ast=ast) # this is a dummy ast
+      lin = Linearizer(ast) # this is a dummy ast

      lin.uops = UOpGraph()
      return lin.uops.add(uop, dtype, vin, arg, cachable=False)
--- a/tinygrad/codegen/kernel.py
+++ b/tinygrad/codegen/kernel.py
@@ -86,11 +86,13 @@ class LinearizerOptions(NamedTuple):
  local_max: Optional[List[int]] = None

 class Kernel:
-  def __init__(self, ast:LazyOp, opts:Optional[LinearizerOptions]=None):
+  def __init__(self, *ast:LazyOp, opts:Optional[LinearizerOptions]=None):
    self.opts = opts or (device.compiler.linearizer_opts if isinstance(device:=Device[Device.DEFAULT], Compiled) and device.compiler is not None else
                         LinearizerOptions(Device.DEFAULT))
-    self.ast = ast
-    assert ast.op == BufferOps.STORE, f"kernels must have a store as the output, got {ast.op}"
+    assert all(op.op is BufferOps.STORE for op in ast), f"kernels must have stores as the output, got {ast}"
+    assert len(set(op.arg.st.size for op in ast)) == 1, f"all outbufs should have the same size, got {[op.arg.st for op in ast]}"
+    assert len(ast) == 1, "max one output per kernel"
+    self.ast = ast[0]

    # fetch lazyop info
    self.info: FlopCounter = get_lazyop_info(self.ast)
--- a/tinygrad/device.py
+++ b/tinygrad/device.py
@@ -248,16 +248,16 @@ class Compiled:
      from tinygrad.features.graph import print_tree
      print_tree(ast)
    from tinygrad.codegen.linearizer import Linearizer
-    k = Linearizer(ast, self.compiler.linearizer_opts)
+    k = Linearizer(ast, opts=self.compiler.linearizer_opts)
    k.required_optimizations()
    if not NOOPT:
      if not (used_tensor_cores:=k.apply_tensor_cores(getenv("TC", 1))): k.hand_coded_optimizations()
      if BEAM >= 1:
        lins = [(("tc" if used_tensor_cores else "hc"), k)]
        if used_tensor_cores:
-          lins.append(("hc", Linearizer(ast, self.compiler.linearizer_opts)))
+          lins.append(("hc", Linearizer(ast, opts=self.compiler.linearizer_opts)))
          lins[-1][1].hand_coded_optimizations()
-        kb = Linearizer(ast, self.compiler.linearizer_opts)
+        kb = Linearizer(ast, opts=self.compiler.linearizer_opts)
        kb.required_optimizations()
        from tinygrad.features.search import beam_search, time_linearizer, bufs_from_lin
        test_rawbuffers = bufs_from_lin(kb)    # allocate scratch buffers for optimization