think about removing realize from lazybuffer (#1965)

* remove realize from lazybuffer * okay fine, back that off * fix tests maybe * fix test
2026-04-29 03:00:14 -04:00 · 2023-10-04 07:18:58 -07:00
parent 2ea1dd3e87
commit d449b3bef1
5 changed files with 14 additions and 17 deletions
--- a/test/test_lazybuffer.py
+++ b/test/test_lazybuffer.py
@@ -18,7 +18,7 @@ class TestLazyBuffer(unittest.TestCase):
      b = LazyBuffer.fromCPU(a).realize()
      #assert b.st.contiguous == a.flags.c_contiguous
      assert b.st.shape == a.shape
-      np.testing.assert_equal(a, b.toCPU())
+      np.testing.assert_equal(a, Tensor(b).numpy())

    for ndims in range(1, 4):
      a = np.random.randn(*(4,)*ndims).astype(np.float32)
--- a/tinygrad/lazy.py
+++ b/tinygrad/lazy.py
@@ -4,7 +4,7 @@ from typing import Callable, Optional, Tuple, Union, List, Dict, Any, cast, Mapp
 from weakref import ref, WeakSet, WeakValueDictionary

 import numpy as np
-from tinygrad.helpers import prod, getenv, DType, dtypes, flatten, ImageDType, partition, all_int, dedup, merge_dicts
+from tinygrad.helpers import prod, getenv, DType, dtypes, flatten, ImageDType, partition, dedup, merge_dicts
 from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps, ReduceOps, MovementOps, LoadOps, OpType, LazyOp, MemBuffer, ConstBuffer, BufferOps
 from tinygrad.shape.shapetracker import ShapeTracker, get_contraction
 from tinygrad.shape.symbolic import Variable, sint
@@ -223,15 +223,6 @@ class LazyBuffer:
  def fromCPU(x: np.ndarray) -> LazyBuffer:
    return LazyBuffer("CPU", ShapeTracker.from_shape(x.shape), LoadOps, None, dtypes.from_np(x.dtype), {}, RawNumpyBuffer.fromCPU(x))

-  def prepare_transfer(self):
-    self_casted = self.e(UnaryOps.CAST, arg=(dtypes.from_np(self.dtype.np), False)) if dtypes.from_np(self.dtype.np) != self.dtype else self
-    return self_casted.contiguous().realize().realized
-
-  def toCPU(self) -> np.ndarray:
-    assert self.dtype.np, f"{self.dtype} is not supported in toCPU"
-    assert all_int(self.shape), f"no toCPU if shape is symbolic, {self.shape=}"
-    return cast(RawBuffer, self.prepare_transfer()).toCPU().reshape(self.shape)
-
  # *** elementwise ops ***

  def e(self:LazyBuffer, op:Union[UnaryOps, BinaryOps, TernaryOps], *srcs:LazyBuffer, arg:Optional[Any]=None) -> LazyBuffer:
--- a/tinygrad/realize.py
+++ b/tinygrad/realize.py
@@ -47,16 +47,19 @@ def _realize_contiguous(buffer: LazyBuffer, src: LazyBuffer) -> None:
  assert buffer.dtype == src.dtype, f"contiguous dtype mismatch, expecting {buffer.dtype}, got {src.dtype}"

 def _realize_from(buffer: LazyBuffer, src: LazyBuffer) -> None:
+  assert src.realized.size == buffer.st.size(), f"size mismatch on FROM {src.realized.size} != {buffer.st.size()}"
+  assert src.st.contiguous and buffer.st.contiguous, "all must be contiguous for from"
  if DEBUG >= 3: print(f"*** copy {buffer.device} <- {src.device} size {src.realized.size} dtype {src.realized.dtype}")
  # TODO: make this generic
  if isinstance(src.realized, RawDiskBuffer) and issubclass(Device[buffer.device].buffer, RawBufferMapped):
    assert all_int(buffer.shape), "does not support symbolic shape"
    buffer.realized = Device[buffer.device].buffer(prod(buffer.shape), buffer.dtype, **buffer._device_extra_args())
-    src.prepare_transfer().readinto(cast(RawBufferMapped, buffer.realized)._buffer())
+    src.realized.readinto(cast(RawBufferMapped, buffer.realized)._buffer())
  elif isinstance(src.realized, RawBufferTransfer) and issubclass(Device[buffer.device].buffer, RawBufferTransfer) and P2P >= 1:
    buffer.realized = cast(RawBufferTransfer, Device[buffer.device].buffer).transfer(src.realized, buffer.shape, buffer.dtype, **buffer._device_extra_args())
  else:
-    buffer.realized = Device[buffer.device].buffer.fromCPU(src.toCPU(), **buffer._device_extra_args())
+    # TODO: schedule this as FROM to go to CPU, and a FROM to go to device
+    buffer.realized = Device[buffer.device].buffer.fromCPU(src.realized.toCPU(), **buffer._device_extra_args())

 # *** n op LoadOps ***

--- a/tinygrad/runtime/ops_torch.py
+++ b/tinygrad/runtime/ops_torch.py
@@ -31,7 +31,7 @@ class RawTorchBuffer(RawBuffer):
  def __init__(self, size:int, dtype:DType, buf:Optional[torch.Tensor]=None): super().__init__(size, dtype, buf if buf is not None else torch.empty([size], dtype=inverse_type_map[dtype]))
  @classmethod
  def fromCPU(cls, x):
-    buf = torch.from_numpy(x).requires_grad_(False).to(device)
+    buf = torch.from_numpy(x if all(s>=0 for s in x.strides) else x.copy()).requires_grad_(False).to(device)
    return cls(prod(x.shape), type_map[buf.dtype], buf)
  def toCPU(self): return self._buf.cpu().numpy()
 TorchBuffer = Interpreted(RawTorchBuffer, torch_fxn_for_op, from_underlying=lambda x: RawTorchBuffer(prod(x.shape), type_map[x.dtype], x))
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -97,7 +97,7 @@ class Tensor:
    # TODO: this is a hack for writing to DISK
    if self.device.startswith("DISK"):
      if x.__class__ is not Tensor: x = Tensor(x, device="CPU", dtype=self.dtype)
-      self.lazydata.contiguous().realize().realized._copyin(x.numpy())  # type: ignore
+      self.contiguous().realize().lazydata.realized._copyin(x.numpy())  # type: ignore
      return self
    if x.__class__ is not Tensor: x = Tensor(x, device=self.device, dtype=self.dtype)
    assert self.shape == x.shape and self.device == x.device, f"assign shape mismatch {self.shape} != {x.shape} or device mismatch {self.device} != {x.device}"
@@ -107,8 +107,11 @@ class Tensor:
    self.lazydata = x.lazydata
    return self

-  def detach(self): return Tensor(self.lazydata, device=self.device, requires_grad=False)
-  def numpy(self) -> np.ndarray: return self.lazydata.toCPU()
+  def detach(self) -> Tensor: return Tensor(self.lazydata, device=self.device, requires_grad=False)
+  def numpy(self) -> np.ndarray:
+    assert all_int(self.shape), f"no numpy if shape is symbolic, {self.shape=}"
+    assert self.dtype.np is not None, f"no numpy dtype for {self.dtype}"
+    return self.detach().cast(dtypes.from_np(self.dtype.np)).contiguous().to('CPU').realize().lazydata.realized._buf.reshape(self.shape)

  # TODO: if things are realized this won't work
  def to_(self, device:str):