mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 07:28:15 -05:00
get_linearizer_actions does not return illegal actions (#2287)
* fix some linearizer failures * linter happy * no new test class
This commit is contained in:
@@ -11,6 +11,12 @@ from tinygrad.shape.view import View
|
||||
from tinygrad.shape.symbolic import Variable
|
||||
inf, nan = float('inf'), float('nan')
|
||||
|
||||
def helper_test_lin(lin, fixed_platforms):
|
||||
if Device.DEFAULT in fixed_platforms:
|
||||
return fuzz_linearizer(lin) == "PASS"
|
||||
else:
|
||||
return fuzz_linearizer(lin) != "PASS"
|
||||
|
||||
class TestLinearizerFailures(unittest.TestCase):
|
||||
@unittest.skip("this is currently failing")
|
||||
def test_failure_1(self):
|
||||
@@ -18,20 +24,20 @@ class TestLinearizerFailures(unittest.TestCase):
|
||||
lin = Linearizer(ast)
|
||||
prg = Device[Device.DEFAULT].to_program(lin)
|
||||
|
||||
# NOTE: test cases from fuzzer run. if you fixed something and it no longer fails, remove the test case / backend.
|
||||
# NOTE: test cases from fuzzer run. if you fixed something and it no longer fails, add platform to fixed_platforms list in helper_test_lin().
|
||||
|
||||
@unittest.skipUnless(isinstance(Device[Device.DEFAULT], Interpreted), "fails on Interpreted")
|
||||
def test_failure_2(self):
|
||||
ast = LazyOp(op=ReduceOps.MAX, src=(LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(32, 2, 111, 27), strides=(6160, 3080, 28, 1), offset=0, mask=((0, 32), (0, 2), (0, 110), (0, 27)), contiguous=False), View(shape=(32, 2, 37, 9, 2, 2), strides=(5994, 2997, 81, 3, 27, 1), offset=0, mask=None, contiguous=False))))),), arg=(32, 2, 37, 9, 1, 1))
|
||||
lin = Linearizer(ast)
|
||||
assert fuzz_linearizer(lin) != "PASS"
|
||||
assert helper_test_lin(lin, fixed_platforms=[])
|
||||
|
||||
@unittest.skipUnless(Device.DEFAULT in ["METAL", "GPU", "LLVM"], "fails on these backends")
|
||||
def test_failure_3(self):
|
||||
ast = LazyOp(op=ReduceOps.SUM, src=(LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(32, 8, 16, 16), strides=(2048, 256, 16, 1), offset=0, mask=None, contiguous=True),)))),), arg=(32, 8, 16, 1))
|
||||
lin = Linearizer(ast)
|
||||
# METAL: AssertionError: Error Domain=AGXMetalG13X Code=3 "Threadgroup memory size (65536) exceeds the maximum threadgroup memory allowed (32768)" UserInfo={NSLocalizedDescription=Threadgroup memory size (65536) exceeds the maximum threadgroup memory allowed (32768)}
|
||||
assert fuzz_linearizer(lin) != "PASS"
|
||||
assert helper_test_lin(lin, fixed_platforms=["LLVM"])
|
||||
|
||||
@unittest.skipUnless(Device.DEFAULT in ["METAL", "LLVM"], "fails on these backends")
|
||||
def test_failure_4(self):
|
||||
@@ -39,14 +45,14 @@ class TestLinearizerFailures(unittest.TestCase):
|
||||
lin = Linearizer(ast)
|
||||
# related to OptOps.NOLOCALS
|
||||
# IndexError: list index out of range
|
||||
assert fuzz_linearizer(lin) != "PASS"
|
||||
assert helper_test_lin(lin, fixed_platforms=["LLVM"])
|
||||
|
||||
@unittest.skipUnless(Device.DEFAULT in ["CLANG", "LLVM"], "fails on these backends")
|
||||
def test_failure_5(self):
|
||||
ast = LazyOp(op=ReduceOps.SUM, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=BinaryOps.MUL, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=0.1464405059814453, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),)))), LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=1.0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None), LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None), LazyOp(op=BinaryOps.MUL, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=0.1464405059814453, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),)))), LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=1.0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None), LazyOp(op=BufferOps.MEM, src=(), arg=MemBuffer(idx=1, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 4, 1, 3, 1, 4, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None)), arg=None),), arg=(1, 1, 1, 1, 1, 1, 1, 1))
|
||||
# EXEC_ERROR, it has no global_size
|
||||
lin = Linearizer(ast)
|
||||
assert fuzz_linearizer(lin) != "PASS"
|
||||
assert helper_test_lin(lin, fixed_platforms=["CLANG", "LLVM"])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -408,20 +408,24 @@ class Kernel:
|
||||
else:
|
||||
amt = -1
|
||||
if opt.op == OptOps.LOCAL: # cyan
|
||||
assert self.opts.has_local, "target does not support local"
|
||||
assert axis < self.first_reduce, "can't local a reduce"
|
||||
assert not(self.tensor_core), "can't local with tensor cores"
|
||||
self.shift_to(axis, amt, insert_before=self.first_reduce)
|
||||
self.local_dims += 1
|
||||
elif opt.op == OptOps.LASTLOCAL: # cyan
|
||||
assert self.opts.has_local, "target does not support local"
|
||||
assert axis < self.first_reduce, "can't local a reduce"
|
||||
self.shift_to(axis, amt, insert_before=self.first_reduce-self.local_dims)
|
||||
self.local_dims += 1
|
||||
elif opt.op == OptOps.GROUP: # green
|
||||
assert self.opts.has_local and self.opts.has_shared, "target does not support local or shared mem"
|
||||
assert axis >= self.first_reduce + len(self.group_for_reduce) and axis < self.shape_len-self.upcasted, "must be reduce axis to group"
|
||||
assert not(self.tensor_core), "can't group with tensor cores"
|
||||
self.shift_to(axis, amt, insert_before=self.first_reduce + len(self.group_for_reduce))
|
||||
self.group_for_reduce.append(amt)
|
||||
elif opt.op == OptOps.GROUPTOP: # green
|
||||
assert self.opts.has_local and self.opts.has_shared, "target does not support local or shared mem"
|
||||
assert axis >= self.first_reduce + len(self.group_for_reduce) and axis < self.shape_len-self.upcasted, "must be reduce axis to group"
|
||||
assert not(self.tensor_core), "can't group with tensor cores"
|
||||
self.shift_to(axis, amt, top=True, insert_before=self.first_reduce + len(self.group_for_reduce))
|
||||
@@ -445,6 +449,7 @@ class Kernel:
|
||||
self.shift_to(axis, amt, insert_before=self.first_reduce + len(self.group_for_reduce))
|
||||
self.group_for_reduce.append(amt)
|
||||
elif opt.op == OptOps.NOLOCALS:
|
||||
assert self.opts.has_local, "target does not support local, so this optimization is meaningless"
|
||||
assert self.local_dims == 0 and len(self.group_for_reduce) == 0, "can't have no locals with locals"
|
||||
assert not self.dont_use_locals, "already not using locals"
|
||||
self.dont_use_locals = True
|
||||
|
||||
Reference in New Issue
Block a user