From d449b3bef1b5c876e846accde27792d9828c0a52 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Wed, 4 Oct 2023 07:18:58 -0700 Subject: [PATCH] think about removing realize from lazybuffer (#1965) * remove realize from lazybuffer * okay fine, back that off * fix tests maybe * fix test --- test/test_lazybuffer.py | 2 +- tinygrad/lazy.py | 11 +---------- tinygrad/realize.py | 7 +++++-- tinygrad/runtime/ops_torch.py | 2 +- tinygrad/tensor.py | 9 ++++++--- 5 files changed, 14 insertions(+), 17 deletions(-) diff --git a/test/test_lazybuffer.py b/test/test_lazybuffer.py index 27f978c64b..4800a649ae 100644 --- a/test/test_lazybuffer.py +++ b/test/test_lazybuffer.py @@ -18,7 +18,7 @@ class TestLazyBuffer(unittest.TestCase): b = LazyBuffer.fromCPU(a).realize() #assert b.st.contiguous == a.flags.c_contiguous assert b.st.shape == a.shape - np.testing.assert_equal(a, b.toCPU()) + np.testing.assert_equal(a, Tensor(b).numpy()) for ndims in range(1, 4): a = np.random.randn(*(4,)*ndims).astype(np.float32) diff --git a/tinygrad/lazy.py b/tinygrad/lazy.py index f5dcb93281..c8446869c5 100644 --- a/tinygrad/lazy.py +++ b/tinygrad/lazy.py @@ -4,7 +4,7 @@ from typing import Callable, Optional, Tuple, Union, List, Dict, Any, cast, Mapp from weakref import ref, WeakSet, WeakValueDictionary import numpy as np -from tinygrad.helpers import prod, getenv, DType, dtypes, flatten, ImageDType, partition, all_int, dedup, merge_dicts +from tinygrad.helpers import prod, getenv, DType, dtypes, flatten, ImageDType, partition, dedup, merge_dicts from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps, ReduceOps, MovementOps, LoadOps, OpType, LazyOp, MemBuffer, ConstBuffer, BufferOps from tinygrad.shape.shapetracker import ShapeTracker, get_contraction from tinygrad.shape.symbolic import Variable, sint @@ -223,15 +223,6 @@ class LazyBuffer: def fromCPU(x: np.ndarray) -> LazyBuffer: return LazyBuffer("CPU", ShapeTracker.from_shape(x.shape), LoadOps, None, dtypes.from_np(x.dtype), {}, RawNumpyBuffer.fromCPU(x)) - def prepare_transfer(self): - self_casted = self.e(UnaryOps.CAST, arg=(dtypes.from_np(self.dtype.np), False)) if dtypes.from_np(self.dtype.np) != self.dtype else self - return self_casted.contiguous().realize().realized - - def toCPU(self) -> np.ndarray: - assert self.dtype.np, f"{self.dtype} is not supported in toCPU" - assert all_int(self.shape), f"no toCPU if shape is symbolic, {self.shape=}" - return cast(RawBuffer, self.prepare_transfer()).toCPU().reshape(self.shape) - # *** elementwise ops *** def e(self:LazyBuffer, op:Union[UnaryOps, BinaryOps, TernaryOps], *srcs:LazyBuffer, arg:Optional[Any]=None) -> LazyBuffer: diff --git a/tinygrad/realize.py b/tinygrad/realize.py index 19829dfbcf..9c53aa413d 100644 --- a/tinygrad/realize.py +++ b/tinygrad/realize.py @@ -47,16 +47,19 @@ def _realize_contiguous(buffer: LazyBuffer, src: LazyBuffer) -> None: assert buffer.dtype == src.dtype, f"contiguous dtype mismatch, expecting {buffer.dtype}, got {src.dtype}" def _realize_from(buffer: LazyBuffer, src: LazyBuffer) -> None: + assert src.realized.size == buffer.st.size(), f"size mismatch on FROM {src.realized.size} != {buffer.st.size()}" + assert src.st.contiguous and buffer.st.contiguous, "all must be contiguous for from" if DEBUG >= 3: print(f"*** copy {buffer.device} <- {src.device} size {src.realized.size} dtype {src.realized.dtype}") # TODO: make this generic if isinstance(src.realized, RawDiskBuffer) and issubclass(Device[buffer.device].buffer, RawBufferMapped): assert all_int(buffer.shape), "does not support symbolic shape" buffer.realized = Device[buffer.device].buffer(prod(buffer.shape), buffer.dtype, **buffer._device_extra_args()) - src.prepare_transfer().readinto(cast(RawBufferMapped, buffer.realized)._buffer()) + src.realized.readinto(cast(RawBufferMapped, buffer.realized)._buffer()) elif isinstance(src.realized, RawBufferTransfer) and issubclass(Device[buffer.device].buffer, RawBufferTransfer) and P2P >= 1: buffer.realized = cast(RawBufferTransfer, Device[buffer.device].buffer).transfer(src.realized, buffer.shape, buffer.dtype, **buffer._device_extra_args()) else: - buffer.realized = Device[buffer.device].buffer.fromCPU(src.toCPU(), **buffer._device_extra_args()) + # TODO: schedule this as FROM to go to CPU, and a FROM to go to device + buffer.realized = Device[buffer.device].buffer.fromCPU(src.realized.toCPU(), **buffer._device_extra_args()) # *** n op LoadOps *** diff --git a/tinygrad/runtime/ops_torch.py b/tinygrad/runtime/ops_torch.py index 38c380818a..fe5d2f3a05 100644 --- a/tinygrad/runtime/ops_torch.py +++ b/tinygrad/runtime/ops_torch.py @@ -31,7 +31,7 @@ class RawTorchBuffer(RawBuffer): def __init__(self, size:int, dtype:DType, buf:Optional[torch.Tensor]=None): super().__init__(size, dtype, buf if buf is not None else torch.empty([size], dtype=inverse_type_map[dtype])) @classmethod def fromCPU(cls, x): - buf = torch.from_numpy(x).requires_grad_(False).to(device) + buf = torch.from_numpy(x if all(s>=0 for s in x.strides) else x.copy()).requires_grad_(False).to(device) return cls(prod(x.shape), type_map[buf.dtype], buf) def toCPU(self): return self._buf.cpu().numpy() TorchBuffer = Interpreted(RawTorchBuffer, torch_fxn_for_op, from_underlying=lambda x: RawTorchBuffer(prod(x.shape), type_map[x.dtype], x)) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 218089ddfc..863e33e219 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -97,7 +97,7 @@ class Tensor: # TODO: this is a hack for writing to DISK if self.device.startswith("DISK"): if x.__class__ is not Tensor: x = Tensor(x, device="CPU", dtype=self.dtype) - self.lazydata.contiguous().realize().realized._copyin(x.numpy()) # type: ignore + self.contiguous().realize().lazydata.realized._copyin(x.numpy()) # type: ignore return self if x.__class__ is not Tensor: x = Tensor(x, device=self.device, dtype=self.dtype) assert self.shape == x.shape and self.device == x.device, f"assign shape mismatch {self.shape} != {x.shape} or device mismatch {self.device} != {x.device}" @@ -107,8 +107,11 @@ class Tensor: self.lazydata = x.lazydata return self - def detach(self): return Tensor(self.lazydata, device=self.device, requires_grad=False) - def numpy(self) -> np.ndarray: return self.lazydata.toCPU() + def detach(self) -> Tensor: return Tensor(self.lazydata, device=self.device, requires_grad=False) + def numpy(self) -> np.ndarray: + assert all_int(self.shape), f"no numpy if shape is symbolic, {self.shape=}" + assert self.dtype.np is not None, f"no numpy dtype for {self.dtype}" + return self.detach().cast(dtypes.from_np(self.dtype.np)).contiguous().to('CPU').realize().lazydata.realized._buf.reshape(self.shape) # TODO: if things are realized this won't work def to_(self, device:str):