scheduler deduping spec and asserts [pr] (#8307)

Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
2026-04-07 03:00:26 -04:00 · 2024-12-18 19:21:41 +02:00
parent c5ae66215a
commit fddaeb6344
2 changed files with 35 additions and 11 deletions
--- a/test/test_assign.py
+++ b/test/test_assign.py
@@ -2,7 +2,6 @@
 import unittest
 import numpy as np
 from tinygrad import dtypes, Tensor, TinyJit, GlobalCounters, Variable
-from tinygrad.engine.schedule import create_schedule

 N = 200  # has to be bigger than the cache to fail

@@ -168,16 +167,6 @@ class TestAssign(unittest.TestCase):
    a += 1
    np.testing.assert_allclose(a.numpy(), 3)

-  # NOTE: this is similar to the resnet failure
-  #@unittest.expectedFailure
-  def test_double_assign_alt(self):
-    a = Tensor.ones(4, dtype=dtypes.int).contiguous().realize()
-    b = Tensor([1, 2, 3, 4], dtype=dtypes.int).realize().lazydata
-    a1 = a.lazydata.assign(b)
-    a2 = a.lazydata.assign(b)
-    sched = create_schedule([a1, a2])
-    self.assertEqual(len(sched), 1)
-
  def test_crossover_assign(self):
    a = Tensor.full((4,), 2).contiguous().realize()
    b = Tensor.full((4,), 3).contiguous().realize()
--- a/test/test_schedule.py
+++ b/test/test_schedule.py
@@ -212,6 +212,41 @@ class TestSchedule(unittest.TestCase):
    reduceops = [x for si in schedule for x in si.ast.toposort if x.op is Ops.REDUCE_AXIS]
    assert len(reduceops) == 2

+  def test_dedup_assign(self):
+    a = Tensor.ones(4).contiguous().realize()
+    b = Tensor.full((4,), 2.).contiguous()
+    first = a.assign(b)
+    second = a.assign(b)
+    check_schedule([first, second], 1)
+
+  # NOTE: this is causing "LAZYCACHE=1 incorrectly reuses contiguous const" #4562
+  # should contiguous dedup?
+  def test_dedup_contiguous(self):
+    a = Tensor.ones(4).contiguous()
+    b = Tensor.ones(4).contiguous()
+    sched = check_schedule([a, b], 1)
+    run_schedule(sched)
+    # a and b share the same underlying device memory
+    self.assertIs(a.lazydata.realized, b.lazydata.realized)
+
+  # EMPTY and COPY are assigned to unique device Buffers
+
+  def test_no_dedup_copy(self):
+    src = Tensor.ones(4).contiguous().realize()
+    a = src.clone()
+    b = src.clone()
+    sched = check_schedule([a, b], 2, filter_sink=False)
+    run_schedule(sched)
+    # a and b are assigned to different device Buffers
+    self.assertIsNot(a.lazydata.realized, b.lazydata.realized)
+
+  def test_no_dedup_empty(self):
+    a = Tensor.empty((4,))
+    b = Tensor.empty((4,))
+    sched = check_schedule([a, b], 2, filter_sink=False)
+    run_schedule(sched)
+    self.assertIsNot(a.lazydata.realized, b.lazydata.realized)
+
  def test_fold_double_unary(self):
    y = Tensor.empty(2)
    out = y.sum(keepdim=True).sqrt().__neg__()