mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 07:28:15 -05:00
@@ -281,6 +281,57 @@ class TestAssign(unittest.TestCase):
|
||||
#assert ba1 == ba2 and ba1 != bb1
|
||||
np.testing.assert_allclose(a.numpy(), np.arange(N*N).reshape((N,N)) + np.arange(N*N).reshape((N,N)).transpose(1,0))
|
||||
|
||||
def test_simple_assignment_multioutput(self):
|
||||
a = Tensor.randn(32, 32).realize()
|
||||
b = Tensor.full((32, ), 1.).contiguous().realize()
|
||||
c = Tensor.full((32, ), 2.).contiguous().realize()
|
||||
d = Tensor.full((32, ), 3.).contiguous().realize()
|
||||
|
||||
r = a.sum(axis=1)
|
||||
b.assign(r + b)
|
||||
c.assign(r + c)
|
||||
d.assign(r + d)
|
||||
|
||||
kc = GlobalCounters.kernel_count
|
||||
Tensor.realize(b, c, d)
|
||||
assert GlobalCounters.kernel_count - kc == 1
|
||||
np.testing.assert_allclose(b.numpy(), a.sum(1).numpy()+1)
|
||||
np.testing.assert_allclose(c.numpy(), a.sum(1).numpy()+2)
|
||||
np.testing.assert_allclose(d.numpy(), a.sum(1).numpy()+3)
|
||||
|
||||
# NOTE: if the assign target is read/write in a single kernel, it should be contiguous
|
||||
|
||||
def test_permuted_assignment_correct(self):
|
||||
a = Tensor.arange(4 * 4).reshape(4, 4).contiguous().realize()
|
||||
b = Tensor.arange(4 * 4).reshape(4, 4).contiguous().realize()
|
||||
# TODO: scheduler limitation, should NOT raise AssertionError from numpy.
|
||||
with self.assertRaises(RuntimeError):
|
||||
a = a.permute(1, 0)
|
||||
new_val = a + b
|
||||
a.assign(new_val)
|
||||
np.testing.assert_equal(a.numpy(), np.arange(4 * 4).reshape(4, 4).transpose(1, 0) + np.arange(4 * 4).reshape(4, 4))
|
||||
|
||||
def test_permuted_reduceop_child_dual_use(self):
|
||||
a = Tensor.randn(32, 32, 32).realize()
|
||||
b = Tensor.full((32, 32), 1.).contiguous().realize()
|
||||
with self.assertRaises(RuntimeError):
|
||||
r = a.sum(axis=1)
|
||||
b.assign(r + b.permute(1, 0))
|
||||
b.realize()
|
||||
|
||||
def test_permuted_reduceop_multioutput_dual_use(self):
|
||||
a = Tensor.randn(32, 32, 32).realize()
|
||||
b = Tensor.full((32, 32), 1.).contiguous().realize()
|
||||
c = Tensor.full((32, 32), 2.).contiguous().realize()
|
||||
|
||||
# TODO: this is failing in cycle error, it should fail earlier.
|
||||
with self.assertRaises(RuntimeError):
|
||||
r = a.sum(axis=1)
|
||||
b_perm = b.permute(1, 0)
|
||||
b.assign(r + b)
|
||||
c.assign(r + b_perm)
|
||||
Tensor.realize(b, c)
|
||||
|
||||
# TODO: is there a way to sneak in a permute such that it returns the wrong answer?
|
||||
|
||||
@unittest.skip("don't use output buffer, and mismatch dtype no longer supported")
|
||||
|
||||
Reference in New Issue
Block a user