import unittest import numpy as np from tinygrad.helpers import BEAM, Timing, CI, prod from tinygrad import Variable, Device, Tensor from tinygrad.nn import Conv2d from tinygrad.uop.ops import AxisType from tinygrad.codegen.opt import Opt, OptOps from tinygrad.codegen.opt.postrange import Scheduler from tinygrad.codegen.opt.search import get_kernel_actions def rand(*shape): return Tensor(np.random.rand(*shape).astype(np.float32)) class TestBeamSearch(unittest.TestCase): def setUp(self): self.old_beam = BEAM.value BEAM.value = 2 def tearDown(self): BEAM.value = self.old_beam def test_variable_ast_beam(self): vi = Variable("a", 1, 10).bind(3) a = rand(10, 3)[:vi] a = (a+1).realize() def test_big_prime_number(self): a = rand(367, 367) b = rand(367, 367) c = (a@b).realize() np.testing.assert_allclose(c.numpy(), a.numpy() @ b.numpy(), atol=1e-4, rtol=1e-4) def test_big_prime_number_max(self): a = -rand(367, 367) b = rand(367, 367) # if incorrectly padded 0, the max would be 0 instead of a negative number c = (a*b).max(1) np.testing.assert_allclose(c.numpy(), (a.numpy() * b.numpy()).max(1), atol=1e-4, rtol=1e-4) def test_big_prime_number_sum(self): a = rand(367, 367) b = rand(367, 367) # if incorrectly padded 0, the sum would be inf c = (a/b).sum(1).realize() np.testing.assert_allclose(c.numpy(), (a.numpy() / b.numpy()).sum(1), atol=1e-4, rtol=1e-4) def test_variable_big_prime_number(self): v = Variable("v", 1, 400).bind(367) a = rand(367, 400) b = rand(400, 367) c = (a[:, :v] @ b[:v, :]).realize() np.testing.assert_allclose(c.numpy(), a[:, :367].numpy() @ b[:367, :].numpy(), atol=1e-4, rtol=1e-4) def test_variable_shrink_prime_number(self): v = Variable("v", 1, 400).bind(367) a = rand(400, 367) b = (a.shrink(((0,v), None))+1)[:367,:367].realize() np.testing.assert_allclose(b.numpy(), a.numpy()[:367]+1, atol=1e-4, rtol=1e-4) def test_no_mutate_rawbuffers(self): a = rand(3, 3).realize() desired = a.numpy() + 1 a.assign(a+1) actual = a.numpy() np.testing.assert_allclose(actual, desired) @unittest.skipIf(CI, "flaky. CL_OUT_OF_RESOURCES") def test_conv_beam(self): c = Conv2d(3, 16, (3,3)) x = rand(1,3,32,32) with Timing(): c(x).realize() @unittest.skip("flaky, Fatal Python error: Floating point exception") def test_large_ast(self): a = Tensor.rand(3, 3) for _ in range(5): for _ in range(4): a = (a + a) * a a.realize() @unittest.skipUnless(Device[Device.DEFAULT].renderer.tensor_cores, "test requires tensor cores") def test_tc_up(self): tc = Device[Device.DEFAULT].renderer.tensor_cores[0] size = max(tc.dims[0], tc.dims[1]) * 8 a, b = Tensor.rand(size, size, dtype=tc.dtype_in), Tensor.rand(size, size, dtype=tc.dtype_in) ast = a.matmul(b, dtype=tc.dtype_out).schedule()[-1].ast s = Scheduler(ast, Device[Device.DEFAULT].renderer) s.apply_opt(Opt(OptOps.TC, 0, (-1, 0, 1))) up = prod([x for x, t in zip(s.full_shape, s.axis_types) if t in (AxisType.UPCAST, AxisType.UNROLL)]) actions = get_kernel_actions(s, include_0=False, max_up=int(up)) upcasted = [s for s in actions.values() if any(opt.op in (OptOps.UPCAST, OptOps.UNROLL) for opt in s.applied_opts)] assert len(upcasted) > 0, f"expected upcast/unroll actions after TC with max_up={up}, but got none" def test_max_up(self): a = Tensor.rand(16, 16) ast = a.schedule()[-1].ast s = Scheduler(ast, Device[Device.DEFAULT].renderer) for max_up in (2, 4): actions = get_kernel_actions(s, include_0=False, max_up=max_up) for up_opts in [s.applied_opts for s in actions.values() if any(opt.op in (OptOps.UPCAST, OptOps.UNROLL) for opt in s.applied_opts)]: assert len([opt for opt in up_opts if opt.arg > max_up]) == 0 and len([op for op in up_opts if op.arg <= max_up]) > 0 if __name__ == '__main__': unittest.main()