simplify tensors before scheduling [pr] (#8580)

* delete forced_realize

* put that back

* work

* remove forced_realize

* expectedFailures

* contiguous(buffer)

* multi

* expectedFailures

* cleaner create_subbuffer

* more comments

* remove that

* note

* realizes

* work

* one upat and image is back

* remove

* cleaner

* fix test_complex_backward for now

---------

Co-authored-by: George Hotz <geohot@gmail.com>
This commit is contained in:
qazal
2025-01-20 16:42:42 -05:00
committed by GitHub
parent 02ad450e22
commit 08eb1f1f56
3 changed files with 30 additions and 56 deletions

View File

@@ -220,7 +220,7 @@ class TestSchedule(unittest.TestCase):
GlobalCounters.reset()
expr = (a*b)/b
expr.realize()
self.assertEqual(GlobalCounters.kernel_count, 1)
self.assertEqual(GlobalCounters.kernel_count, 0) # the scheduler can fold divs now!
self.assertEqual(GlobalCounters.global_ops, 0)
np.testing.assert_allclose(expr.numpy(), np.full((4,), 4.0))
@@ -229,7 +229,7 @@ class TestSchedule(unittest.TestCase):
GlobalCounters.reset()
expr = a/a
expr.realize()
self.assertEqual(GlobalCounters.kernel_count, 1)
self.assertEqual(GlobalCounters.kernel_count, 0)
self.assertEqual(GlobalCounters.global_ops, 0)
np.testing.assert_allclose(expr.numpy(), np.full((4,), 1.0))
@@ -2204,7 +2204,7 @@ class TestConst(unittest.TestCase):
sched = add.schedule()
self.assertEqual(len(sched), 0)
# b+0 and b share the same underlying device memory
self.assertIs(add.lazydata.realized, b.lazydata.realized)
self.assertIs(add.lazydata.buffer, b.lazydata.buffer)
self.assertListEqual(add.tolist(), [2, 2, 2, 2])
def test_src_masked_const_folding(self):