assign buffer read/write tests (#4565)

* simple tests * more tests
2026-01-10 07:28:15 -05:00 · 2024-05-13 14:43:36 +08:00
parent b660f60125
commit 4e1135a0bc
1 changed files with 51 additions and 0 deletions
--- a/test/test_assign.py
+++ b/test/test_assign.py
@@ -281,6 +281,57 @@ class TestAssign(unittest.TestCase):
      #assert ba1 == ba2 and ba1 != bb1
      np.testing.assert_allclose(a.numpy(), np.arange(N*N).reshape((N,N)) + np.arange(N*N).reshape((N,N)).transpose(1,0))

+  def test_simple_assignment_multioutput(self):
+    a = Tensor.randn(32, 32).realize()
+    b = Tensor.full((32, ), 1.).contiguous().realize()
+    c = Tensor.full((32, ), 2.).contiguous().realize()
+    d = Tensor.full((32, ), 3.).contiguous().realize()
+
+    r = a.sum(axis=1)
+    b.assign(r + b)
+    c.assign(r + c)
+    d.assign(r + d)
+
+    kc = GlobalCounters.kernel_count
+    Tensor.realize(b, c, d)
+    assert GlobalCounters.kernel_count - kc == 1
+    np.testing.assert_allclose(b.numpy(), a.sum(1).numpy()+1)
+    np.testing.assert_allclose(c.numpy(), a.sum(1).numpy()+2)
+    np.testing.assert_allclose(d.numpy(), a.sum(1).numpy()+3)
+
+  # NOTE: if the assign target is read/write in a single kernel, it should be contiguous
+
+  def test_permuted_assignment_correct(self):
+    a = Tensor.arange(4 * 4).reshape(4, 4).contiguous().realize()
+    b = Tensor.arange(4 * 4).reshape(4, 4).contiguous().realize()
+    # TODO: scheduler limitation, should NOT raise AssertionError from numpy.
+    with self.assertRaises(RuntimeError):
+      a = a.permute(1, 0)
+      new_val = a + b
+      a.assign(new_val)
+      np.testing.assert_equal(a.numpy(), np.arange(4 * 4).reshape(4, 4).transpose(1, 0) + np.arange(4 * 4).reshape(4, 4))
+
+  def test_permuted_reduceop_child_dual_use(self):
+    a = Tensor.randn(32, 32, 32).realize()
+    b = Tensor.full((32, 32), 1.).contiguous().realize()
+    with self.assertRaises(RuntimeError):
+      r = a.sum(axis=1)
+      b.assign(r + b.permute(1, 0))
+      b.realize()
+
+  def test_permuted_reduceop_multioutput_dual_use(self):
+    a = Tensor.randn(32, 32, 32).realize()
+    b = Tensor.full((32, 32), 1.).contiguous().realize()
+    c = Tensor.full((32, 32), 2.).contiguous().realize()
+
+    # TODO: this is failing in cycle error, it should fail earlier.
+    with self.assertRaises(RuntimeError):
+      r = a.sum(axis=1)
+      b_perm = b.permute(1, 0)
+      b.assign(r + b)
+      c.assign(r + b_perm)
+      Tensor.realize(b, c)
+
  # TODO: is there a way to sneak in a permute such that it returns the wrong answer?

  @unittest.skip("don't use output buffer, and mismatch dtype no longer supported")