get_linearizer_actions does not return illegal actions (#2287)

* fix some linearizer failures * linter happy * no new test class
2026-01-10 07:28:15 -05:00 · 2023-11-13 19:48:54 +03:00
parent 53c5baa8b6
commit 960535dfb8
2 changed files with 16 additions and 5 deletions
--- a/test/test_linearizer_failures.py
+++ b/test/test_linearizer_failures.py
@@ -11,6 +11,12 @@ from tinygrad.shape.view import View
 from tinygrad.shape.symbolic import Variable
 inf, nan = float('inf'), float('nan')

+def helper_test_lin(lin, fixed_platforms):
+  if Device.DEFAULT in fixed_platforms:
+    return fuzz_linearizer(lin) == "PASS"
+  else:
+    return fuzz_linearizer(lin) != "PASS"
+
 class TestLinearizerFailures(unittest.TestCase):
  @unittest.skip("this is currently failing")
  def test_failure_1(self):
@@ -18,20 +24,20 @@ class TestLinearizerFailures(unittest.TestCase):
    lin = Linearizer(ast)
    prg = Device[Device.DEFAULT].to_program(lin)

-  # NOTE: test cases from fuzzer run. if you fixed something and it no longer fails, remove the test case / backend.
+  # NOTE: test cases from fuzzer run. if you fixed something and it no longer fails, add platform to fixed_platforms list in helper_test_lin().

  @unittest.skipUnless(isinstance(Device[Device.DEFAULT], Interpreted), "fails on Interpreted")
  def test_failure_2(self):
    ast = LazyOp(op=ReduceOps.MAX, src=(LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(32, 2, 111, 27), strides=(6160, 3080, 28, 1), offset=0, mask=((0, 32), (0, 2), (0, 110), (0, 27)), contiguous=False), View(shape=(32, 2, 37, 9, 2, 2), strides=(5994, 2997, 81, 3, 27, 1), offset=0, mask=None, contiguous=False))))),), arg=(32, 2, 37, 9, 1, 1))
    lin = Linearizer(ast)
-    assert fuzz_linearizer(lin) != "PASS"
+    assert helper_test_lin(lin, fixed_platforms=[])

  @unittest.skipUnless(Device.DEFAULT in ["METAL", "GPU", "LLVM"], "fails on these backends")
  def test_failure_3(self):
    ast = LazyOp(op=ReduceOps.SUM, src=(LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(32, 8, 16, 16), strides=(2048, 256, 16, 1), offset=0, mask=None, contiguous=True),)))),), arg=(32, 8, 16, 1))
    lin = Linearizer(ast)
    # METAL: AssertionError: Error Domain=AGXMetalG13X Code=3 "Threadgroup memory size (65536) exceeds the maximum threadgroup memory allowed (32768)" UserInfo={NSLocalizedDescription=Threadgroup memory size (65536) exceeds the maximum threadgroup memory allowed (32768)}
-    assert fuzz_linearizer(lin) != "PASS"
+    assert helper_test_lin(lin, fixed_platforms=["LLVM"])

  @unittest.skipUnless(Device.DEFAULT in ["METAL", "LLVM"], "fails on these backends")
  def test_failure_4(self):
@@ -39,14 +45,14 @@ class TestLinearizerFailures(unittest.TestCase):
    lin = Linearizer(ast)
    # related to OptOps.NOLOCALS
    # IndexError: list index out of range
-    assert fuzz_linearizer(lin) != "PASS"
+    assert helper_test_lin(lin, fixed_platforms=["LLVM"])

  @unittest.skipUnless(Device.DEFAULT in ["CLANG", "LLVM"], "fails on these backends")
  def test_failure_5(self):
    ast = LazyOp(op=ReduceOps.SUM, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=BinaryOps.MUL, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=0.1464405059814453, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),)))), LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=1.0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None), LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None), LazyOp(op=BinaryOps.MUL, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=0.1464405059814453, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),)))), LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=1.0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None), LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None)), arg=None),), arg=(1, 1, 1, 1, 1, 1, 1, 1))
    # EXEC_ERROR, it has no global_size
    lin = Linearizer(ast)
-    assert fuzz_linearizer(lin) != "PASS"
+    assert helper_test_lin(lin, fixed_platforms=["CLANG", "LLVM"])


 if __name__ == '__main__':
--- a/tinygrad/codegen/kernel.py
+++ b/tinygrad/codegen/kernel.py
@@ -408,20 +408,24 @@ class Kernel:
    else:
      amt = -1
    if opt.op == OptOps.LOCAL:        # cyan
+      assert self.opts.has_local, "target does not support local"
      assert axis < self.first_reduce, "can't local a reduce"
      assert not(self.tensor_core), "can't local with tensor cores"
      self.shift_to(axis, amt, insert_before=self.first_reduce)
      self.local_dims += 1
    elif opt.op == OptOps.LASTLOCAL:  # cyan
+      assert self.opts.has_local, "target does not support local"
      assert axis < self.first_reduce, "can't local a reduce"
      self.shift_to(axis, amt, insert_before=self.first_reduce-self.local_dims)
      self.local_dims += 1
    elif opt.op == OptOps.GROUP:      # green
+      assert self.opts.has_local and self.opts.has_shared, "target does not support local or shared mem"
      assert axis >= self.first_reduce + len(self.group_for_reduce) and axis < self.shape_len-self.upcasted, "must be reduce axis to group"
      assert not(self.tensor_core), "can't group with tensor cores"
      self.shift_to(axis, amt, insert_before=self.first_reduce + len(self.group_for_reduce))
      self.group_for_reduce.append(amt)
    elif opt.op == OptOps.GROUPTOP:   # green
+      assert self.opts.has_local and self.opts.has_shared, "target does not support local or shared mem"
      assert axis >= self.first_reduce + len(self.group_for_reduce) and axis < self.shape_len-self.upcasted, "must be reduce axis to group"
      assert not(self.tensor_core), "can't group with tensor cores"
      self.shift_to(axis, amt, top=True, insert_before=self.first_reduce + len(self.group_for_reduce))
@@ -445,6 +449,7 @@ class Kernel:
      self.shift_to(axis, amt, insert_before=self.first_reduce + len(self.group_for_reduce))
      self.group_for_reduce.append(amt)
    elif opt.op == OptOps.NOLOCALS:
+      assert self.opts.has_local, "target does not support local, so this optimization is meaningless"
      assert self.local_dims == 0 and len(self.group_for_reduce) == 0, "can't have no locals with locals"
      assert not self.dont_use_locals, "already not using locals"
      self.dont_use_locals = True