realized base tensors become RESHAPE(BUFFER) [pr] (#8994)

2026-01-09 15:08:02 -05:00 · 2025-02-10 10:17:54 +01:00
parent 910ae260cd
commit fd9f9ec772
5 changed files with 17 additions and 16 deletions
--- a/test/test_schedule.py
+++ b/test/test_schedule.py
@@ -2496,7 +2496,7 @@ class TestUOpBecome(unittest.TestCase):
    # NOTE: realized base is always a flat buffer
    assert UPat(Ops.BUFFER).match(add.lazydata.base, {})
    # the Tensor UOp can optionally stack a VIEW on top of BUFFER
-    assert UPat(Ops.VIEW, src=(UPat(Ops.BUFFER),)).match(add.lazydata, {})
+    assert UPat(Ops.RESHAPE, src=(UPat(Ops.BUFFER),)).match(add.lazydata, {})

  def test_new_buffer_view(self):
    a = Tensor.empty(4, 4)
--- a/test/test_uops.py
+++ b/test/test_uops.py
@@ -553,7 +553,7 @@ class TestShapeSpec(unittest.TestCase):
  def test_assign_flat(self):
    buffer = Tensor.arange(4).realize()
    a = buffer.assign(Tensor.zeros((4,), dtype=dtypes.int))
-    assign_pattern = UPat(Ops.ASSIGN, src=(UPat(Ops.VIEW, src=(UPat(Ops.BUFFER),)), UPat()))
+    assign_pattern = UPat(Ops.ASSIGN, src=(UPat(Ops.BUFFER), UPat()))
    assert assign_pattern.match(a.lazydata, {})
    a.realize()
    self.assertEqual(buffer.tolist(), [0, 0, 0, 0])
@@ -567,7 +567,7 @@ class TestShapeSpec(unittest.TestCase):
  def test_assign_reshaped(self):
    buffer = Tensor.ones((4,)).contiguous().realize()
    a = buffer.reshape((2, 2)).assign(Tensor.zeros((2, 2)))
-    assign_pattern = UPat(Ops.ASSIGN, src=(UPat(Ops.RESHAPE, src=(UPat(Ops.VIEW, src=(UPat(Ops.BUFFER),),))), UPat()))
+    assign_pattern = UPat(Ops.ASSIGN, src=(UPat(Ops.RESHAPE, src=(UPat(Ops.BUFFER))), UPat()))
    assert assign_pattern.match(a.lazydata, {})
    a.realize()
    self.assertEqual(buffer.tolist(), [0, 0, 0, 0])
--- a/test/unit/test_gradient.py
+++ b/test/unit/test_gradient.py
@@ -102,14 +102,16 @@ class TestTensorGradient(unittest.TestCase):
 class TestRealizeMeansRealize(unittest.TestCase):
  def test_randn_realizes(self):
    x = Tensor.randn(2, 3, 64, 64, requires_grad=True).realize()
-    self.assertEqual(x.lazydata.op, Ops.VIEW)
+    self.assertEqual(x.lazydata.op, Ops.RESHAPE)
+    assert x.lazydata.is_realized

  #@unittest.expectedFailure
  # update: passing after delete_forced_realize
  def test_uniform_realizes(self):
    x = Tensor.uniform(16, 3, 3, 3, requires_grad=True).realize()
    print(x.lazydata)
-    self.assertEqual(x.lazydata.op, Ops.VIEW)
+    self.assertEqual(x.lazydata.op, Ops.RESHAPE)
+    assert x.lazydata.is_realized

  # NOTE: even though it doesn't realize, this seems fine
  def test_uniform_gradient(self):
--- a/test/unit/test_tensor_uop_representation.py
+++ b/test/unit/test_tensor_uop_representation.py
@@ -4,8 +4,8 @@ from tinygrad.ops import UPat, Ops, UOp

 # NOTE: unlike before base for a realized tensor is always a BUFFER
 realized_pattern = UPat(Ops.BUFFER)
-# after realization, tensor uops become VIEW(BUFFER)
-buffer_view_pattern = UPat(Ops.VIEW, src=(UPat(Ops.BUFFER),))
+# after realization, base tensor uops become RESHAPE(BUFFER)
+buffer_view_pattern = UPat(Ops.RESHAPE, src=(UPat(Ops.BUFFER),))
 const_pattern = UPat(Ops.CONST, src=(UPat(Ops.VIEW, src=(UPat(Ops.DEVICE),),)))
 def is_pattern_uop(u:UOp, pat:UPat): assert pat.match(u, {}), f"{u}\nis not\n{pat}"
 def is_pattern(ten:Tensor, pat:UPat): is_pattern_uop(ten.lazydata, pat)
@@ -33,8 +33,8 @@ class TestTensorMutates(unittest.TestCase):
    is_pattern_uop(d.lazydata.base, realized_pattern)
    is_pattern_uop(c.lazydata.base, realized_pattern)
    # NOTE: we keep movement ops on top of the buffer view
-    is_pattern_uop(c.lazydata, buffer_view_pattern)
-    is_pattern_uop(d.lazydata, UPat(Ops.RESHAPE, src=(buffer_view_pattern,)))
+    is_pattern_uop(c.lazydata, UPat(Ops.BUFFER))
+    is_pattern_uop(d.lazydata, UPat(Ops.RESHAPE, src=(realized_pattern,)))

  def test_reshape_is_same_child(self):
    a = Tensor([1,2,3])
@@ -56,8 +56,7 @@ class TestTensorUopRepresentation(unittest.TestCase):
    b = Tensor([4.,5,6]).realize()
    c = a+b
    print(c.lazydata)
-    #is_pattern(c, UPat(Ops.ADD, src=(realized_pattern, realized_pattern)))
-    is_pattern(c, UPat(Ops.ADD, src=(UPat(Ops.VIEW, src=(realized_pattern,)), UPat(Ops.VIEW, src=(realized_pattern,)))))
+    is_pattern(c, UPat(Ops.ADD, src=(realized_pattern, realized_pattern)))

  def test_const_pattern(self):
    a = Tensor(1)
@@ -114,9 +113,7 @@ class TestTensorUopRepresentation(unittest.TestCase):
    a = Tensor([1.,2,3]).realize()
    c = a.to("TEST")   # NOTE: this isn't checked
    print(c.lazydata)
-    # TODO: COPY on a Tensor becomes a VIEW(COPY), this should be done in the scheduler not in ops
-    #is_pattern(c, UPat(Ops.COPY, src=(UPat(Ops.DEVICE), realized_pattern,)))
-    is_pattern(c, UPat(Ops.VIEW, src=(UPat(Ops.COPY, src=(UPat(Ops.DEVICE), realized_pattern,)),)))
+    is_pattern(c, UPat(Ops.COPY, src=(UPat(Ops.DEVICE), realized_pattern,)))

  def test_empty_buf(self):
    a = Tensor.empty(3, 3)
--- a/tinygrad/engine/schedule.py
+++ b/tinygrad/engine/schedule.py
@@ -420,7 +420,7 @@ def create_schedule_with_vars(big_sink:UOp) -> tuple[list[ScheduleItem], dict[Va
    # NOOP
    if k.base is v.base: continue
    # NOTE: only the base tensors get a BUFFER UOp
-    if v.is_realized and k is k.base: becomes_map[k] = v.view(unwrap(k.st))
+    if v.is_realized and k is k.base: becomes_map[k] = v.reshape(k.shape)
    # otherwise if it simplified to a CONST the UOp just becomes that CONST
    elif v.op is Ops.CONST and all_int(v.shape): becomes_map[k] = v

@@ -439,7 +439,9 @@ def create_schedule_with_vars(big_sink:UOp) -> tuple[list[ScheduleItem], dict[Va

  # map buffers to realized tensors
  for buf_uop in realize_map:
-    for tensor_uop in buf_tensors[buf_uop]: becomes_map[tensor_uop] = buf_uop.view(unwrap(tensor_uop.st))
+    for tensor_uop in buf_tensors[buf_uop]:
+      # ASSIGN just becomes the buffer in source, otherwise we reshape the buffer
+      becomes_map[tensor_uop] = tensor_uop.src[0] if tensor_uop.op is Ops.ASSIGN else buf_uop.reshape(tensor_uop.shape)
    buf_uop.buffer.ref(1)

  # create kernels, TODO: this should use the SINK from tensor_map