* add constant fold

* err, it's just zero folding

* self store fold + caching

* prints and more folds

* simpler winograd kernels

* remove childless uops
This commit is contained in:
George Hotz
2023-09-03 13:48:11 -07:00
committed by GitHub
parent e17b1af160
commit ed194a1d3b
6 changed files with 66 additions and 9 deletions

View File

@@ -55,5 +55,35 @@ class TestLinearizer(unittest.TestCase):
num_ops = len([uop for uop in k.uops if uop.uop == UOps.ALU])
assert num_ops <= 1, "more alu uops than needed"
def test_zero_fold(self):
if not isinstance(Device[Device.DEFAULT], Compiled):
self.skipTest("Only Compiled uses linearizer")
a, b = Tensor.randn(1).realize(), Tensor.randn(1).realize()
r = Tensor.stack([a, b])
ast = r.lazydata.op
r = r.realize() # realize an output buffer
k = Linearizer(ast, r.lazydata, Device[Device.DEFAULT].linearizer_opts)
k.process()
k.upcast()
k.linearize()
num_ops = len([uop for uop in k.uops if uop.uop == UOps.ALU])
assert num_ops == 0, "more alu uops than needed"
@unittest.skip("constant folding not supported yet")
def test_constant_fold(self):
if not isinstance(Device[Device.DEFAULT], Compiled):
self.skipTest("Only Compiled uses linearizer")
a, b = Tensor(2), Tensor(3)
r = a * b
ast = r.lazydata.op
r = r.realize() # realize an output buffer
k = Linearizer(ast, r.lazydata, Device[Device.DEFAULT].linearizer_opts)
k.process()
k.linearize()
num_ops = len([uop for uop in k.uops if uop.uop in [UOps.LOAD, UOps.ALU]])
assert num_ops <= 0, "more load or alu uops than needed"
if __name__ == '__main__':
unittest.main()