diff --git a/test/test_assign.py b/test/test_assign.py index 115f1674c0..7e7ed73bc3 100644 --- a/test/test_assign.py +++ b/test/test_assign.py @@ -281,6 +281,57 @@ class TestAssign(unittest.TestCase): #assert ba1 == ba2 and ba1 != bb1 np.testing.assert_allclose(a.numpy(), np.arange(N*N).reshape((N,N)) + np.arange(N*N).reshape((N,N)).transpose(1,0)) + def test_simple_assignment_multioutput(self): + a = Tensor.randn(32, 32).realize() + b = Tensor.full((32, ), 1.).contiguous().realize() + c = Tensor.full((32, ), 2.).contiguous().realize() + d = Tensor.full((32, ), 3.).contiguous().realize() + + r = a.sum(axis=1) + b.assign(r + b) + c.assign(r + c) + d.assign(r + d) + + kc = GlobalCounters.kernel_count + Tensor.realize(b, c, d) + assert GlobalCounters.kernel_count - kc == 1 + np.testing.assert_allclose(b.numpy(), a.sum(1).numpy()+1) + np.testing.assert_allclose(c.numpy(), a.sum(1).numpy()+2) + np.testing.assert_allclose(d.numpy(), a.sum(1).numpy()+3) + + # NOTE: if the assign target is read/write in a single kernel, it should be contiguous + + def test_permuted_assignment_correct(self): + a = Tensor.arange(4 * 4).reshape(4, 4).contiguous().realize() + b = Tensor.arange(4 * 4).reshape(4, 4).contiguous().realize() + # TODO: scheduler limitation, should NOT raise AssertionError from numpy. + with self.assertRaises(RuntimeError): + a = a.permute(1, 0) + new_val = a + b + a.assign(new_val) + np.testing.assert_equal(a.numpy(), np.arange(4 * 4).reshape(4, 4).transpose(1, 0) + np.arange(4 * 4).reshape(4, 4)) + + def test_permuted_reduceop_child_dual_use(self): + a = Tensor.randn(32, 32, 32).realize() + b = Tensor.full((32, 32), 1.).contiguous().realize() + with self.assertRaises(RuntimeError): + r = a.sum(axis=1) + b.assign(r + b.permute(1, 0)) + b.realize() + + def test_permuted_reduceop_multioutput_dual_use(self): + a = Tensor.randn(32, 32, 32).realize() + b = Tensor.full((32, 32), 1.).contiguous().realize() + c = Tensor.full((32, 32), 2.).contiguous().realize() + + # TODO: this is failing in cycle error, it should fail earlier. + with self.assertRaises(RuntimeError): + r = a.sum(axis=1) + b_perm = b.permute(1, 0) + b.assign(r + b) + c.assign(r + b_perm) + Tensor.realize(b, c) + # TODO: is there a way to sneak in a permute such that it returns the wrong answer? @unittest.skip("don't use output buffer, and mismatch dtype no longer supported")