get_linearizer_actions does not return illegal actions (#2287)

* fix some linearizer failures

* linter happy

* no new test class
This commit is contained in:
nimlgen
2023-11-13 19:48:54 +03:00
committed by GitHub
parent 53c5baa8b6
commit 960535dfb8
2 changed files with 16 additions and 5 deletions

View File

@@ -11,6 +11,12 @@ from tinygrad.shape.view import View
from tinygrad.shape.symbolic import Variable
inf, nan = float('inf'), float('nan')
def helper_test_lin(lin, fixed_platforms):
if Device.DEFAULT in fixed_platforms:
return fuzz_linearizer(lin) == "PASS"
else:
return fuzz_linearizer(lin) != "PASS"
class TestLinearizerFailures(unittest.TestCase):
@unittest.skip("this is currently failing")
def test_failure_1(self):
@@ -18,20 +24,20 @@ class TestLinearizerFailures(unittest.TestCase):
lin = Linearizer(ast)
prg = Device[Device.DEFAULT].to_program(lin)
# NOTE: test cases from fuzzer run. if you fixed something and it no longer fails, remove the test case / backend.
# NOTE: test cases from fuzzer run. if you fixed something and it no longer fails, add platform to fixed_platforms list in helper_test_lin().
@unittest.skipUnless(isinstance(Device[Device.DEFAULT], Interpreted), "fails on Interpreted")
def test_failure_2(self):
ast = LazyOp(op=ReduceOps.MAX, src=(LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(32, 2, 111, 27), strides=(6160, 3080, 28, 1), offset=0, mask=((0, 32), (0, 2), (0, 110), (0, 27)), contiguous=False), View(shape=(32, 2, 37, 9, 2, 2), strides=(5994, 2997, 81, 3, 27, 1), offset=0, mask=None, contiguous=False))))),), arg=(32, 2, 37, 9, 1, 1))
lin = Linearizer(ast)
assert fuzz_linearizer(lin) != "PASS"
assert helper_test_lin(lin, fixed_platforms=[])
@unittest.skipUnless(Device.DEFAULT in ["METAL", "GPU", "LLVM"], "fails on these backends")
def test_failure_3(self):
ast = LazyOp(op=ReduceOps.SUM, src=(LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(32, 8, 16, 16), strides=(2048, 256, 16, 1), offset=0, mask=None, contiguous=True),)))),), arg=(32, 8, 16, 1))
lin = Linearizer(ast)
# METAL: AssertionError: Error Domain=AGXMetalG13X Code=3 "Threadgroup memory size (65536) exceeds the maximum threadgroup memory allowed (32768)" UserInfo={NSLocalizedDescription=Threadgroup memory size (65536) exceeds the maximum threadgroup memory allowed (32768)}
assert fuzz_linearizer(lin) != "PASS"
assert helper_test_lin(lin, fixed_platforms=["LLVM"])
@unittest.skipUnless(Device.DEFAULT in ["METAL", "LLVM"], "fails on these backends")
def test_failure_4(self):
@@ -39,14 +45,14 @@ class TestLinearizerFailures(unittest.TestCase):
lin = Linearizer(ast)
# related to OptOps.NOLOCALS
# IndexError: list index out of range
assert fuzz_linearizer(lin) != "PASS"
assert helper_test_lin(lin, fixed_platforms=["LLVM"])
@unittest.skipUnless(Device.DEFAULT in ["CLANG", "LLVM"], "fails on these backends")
def test_failure_5(self):
ast = LazyOp(op=ReduceOps.SUM, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=BinaryOps.MUL, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=0.1464405059814453, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),)))), LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=1.0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None), LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None), LazyOp(op=BinaryOps.MUL, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=0.1464405059814453, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),)))), LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=1.0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None), LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None)), arg=None),), arg=(1, 1, 1, 1, 1, 1, 1, 1))
# EXEC_ERROR, it has no global_size
lin = Linearizer(ast)
assert fuzz_linearizer(lin) != "PASS"
assert helper_test_lin(lin, fixed_platforms=["CLANG", "LLVM"])
if __name__ == '__main__':

View File

@@ -408,20 +408,24 @@ class Kernel:
else:
amt = -1
if opt.op == OptOps.LOCAL: # cyan
assert self.opts.has_local, "target does not support local"
assert axis < self.first_reduce, "can't local a reduce"
assert not(self.tensor_core), "can't local with tensor cores"
self.shift_to(axis, amt, insert_before=self.first_reduce)
self.local_dims += 1
elif opt.op == OptOps.LASTLOCAL: # cyan
assert self.opts.has_local, "target does not support local"
assert axis < self.first_reduce, "can't local a reduce"
self.shift_to(axis, amt, insert_before=self.first_reduce-self.local_dims)
self.local_dims += 1
elif opt.op == OptOps.GROUP: # green
assert self.opts.has_local and self.opts.has_shared, "target does not support local or shared mem"
assert axis >= self.first_reduce + len(self.group_for_reduce) and axis < self.shape_len-self.upcasted, "must be reduce axis to group"
assert not(self.tensor_core), "can't group with tensor cores"
self.shift_to(axis, amt, insert_before=self.first_reduce + len(self.group_for_reduce))
self.group_for_reduce.append(amt)
elif opt.op == OptOps.GROUPTOP: # green
assert self.opts.has_local and self.opts.has_shared, "target does not support local or shared mem"
assert axis >= self.first_reduce + len(self.group_for_reduce) and axis < self.shape_len-self.upcasted, "must be reduce axis to group"
assert not(self.tensor_core), "can't group with tensor cores"
self.shift_to(axis, amt, top=True, insert_before=self.first_reduce + len(self.group_for_reduce))
@@ -445,6 +449,7 @@ class Kernel:
self.shift_to(axis, amt, insert_before=self.first_reduce + len(self.group_for_reduce))
self.group_for_reduce.append(amt)
elif opt.op == OptOps.NOLOCALS:
assert self.opts.has_local, "target does not support local, so this optimization is meaningless"
assert self.local_dims == 0 and len(self.group_for_reduce) == 0, "can't have no locals with locals"
assert not self.dont_use_locals, "already not using locals"
self.dont_use_locals = True