think about removing realize from lazybuffer (#1965)

* remove realize from lazybuffer

* okay fine, back that off

* fix tests maybe

* fix test
This commit is contained in:
George Hotz
2023-10-04 07:18:58 -07:00
committed by GitHub
parent 2ea1dd3e87
commit d449b3bef1
5 changed files with 14 additions and 17 deletions

View File

@@ -18,7 +18,7 @@ class TestLazyBuffer(unittest.TestCase):
b = LazyBuffer.fromCPU(a).realize()
#assert b.st.contiguous == a.flags.c_contiguous
assert b.st.shape == a.shape
np.testing.assert_equal(a, b.toCPU())
np.testing.assert_equal(a, Tensor(b).numpy())
for ndims in range(1, 4):
a = np.random.randn(*(4,)*ndims).astype(np.float32)

View File

@@ -4,7 +4,7 @@ from typing import Callable, Optional, Tuple, Union, List, Dict, Any, cast, Mapp
from weakref import ref, WeakSet, WeakValueDictionary
import numpy as np
from tinygrad.helpers import prod, getenv, DType, dtypes, flatten, ImageDType, partition, all_int, dedup, merge_dicts
from tinygrad.helpers import prod, getenv, DType, dtypes, flatten, ImageDType, partition, dedup, merge_dicts
from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps, ReduceOps, MovementOps, LoadOps, OpType, LazyOp, MemBuffer, ConstBuffer, BufferOps
from tinygrad.shape.shapetracker import ShapeTracker, get_contraction
from tinygrad.shape.symbolic import Variable, sint
@@ -223,15 +223,6 @@ class LazyBuffer:
def fromCPU(x: np.ndarray) -> LazyBuffer:
return LazyBuffer("CPU", ShapeTracker.from_shape(x.shape), LoadOps, None, dtypes.from_np(x.dtype), {}, RawNumpyBuffer.fromCPU(x))
def prepare_transfer(self):
self_casted = self.e(UnaryOps.CAST, arg=(dtypes.from_np(self.dtype.np), False)) if dtypes.from_np(self.dtype.np) != self.dtype else self
return self_casted.contiguous().realize().realized
def toCPU(self) -> np.ndarray:
assert self.dtype.np, f"{self.dtype} is not supported in toCPU"
assert all_int(self.shape), f"no toCPU if shape is symbolic, {self.shape=}"
return cast(RawBuffer, self.prepare_transfer()).toCPU().reshape(self.shape)
# *** elementwise ops ***
def e(self:LazyBuffer, op:Union[UnaryOps, BinaryOps, TernaryOps], *srcs:LazyBuffer, arg:Optional[Any]=None) -> LazyBuffer:

View File

@@ -47,16 +47,19 @@ def _realize_contiguous(buffer: LazyBuffer, src: LazyBuffer) -> None:
assert buffer.dtype == src.dtype, f"contiguous dtype mismatch, expecting {buffer.dtype}, got {src.dtype}"
def _realize_from(buffer: LazyBuffer, src: LazyBuffer) -> None:
assert src.realized.size == buffer.st.size(), f"size mismatch on FROM {src.realized.size} != {buffer.st.size()}"
assert src.st.contiguous and buffer.st.contiguous, "all must be contiguous for from"
if DEBUG >= 3: print(f"*** copy {buffer.device} <- {src.device} size {src.realized.size} dtype {src.realized.dtype}")
# TODO: make this generic
if isinstance(src.realized, RawDiskBuffer) and issubclass(Device[buffer.device].buffer, RawBufferMapped):
assert all_int(buffer.shape), "does not support symbolic shape"
buffer.realized = Device[buffer.device].buffer(prod(buffer.shape), buffer.dtype, **buffer._device_extra_args())
src.prepare_transfer().readinto(cast(RawBufferMapped, buffer.realized)._buffer())
src.realized.readinto(cast(RawBufferMapped, buffer.realized)._buffer())
elif isinstance(src.realized, RawBufferTransfer) and issubclass(Device[buffer.device].buffer, RawBufferTransfer) and P2P >= 1:
buffer.realized = cast(RawBufferTransfer, Device[buffer.device].buffer).transfer(src.realized, buffer.shape, buffer.dtype, **buffer._device_extra_args())
else:
buffer.realized = Device[buffer.device].buffer.fromCPU(src.toCPU(), **buffer._device_extra_args())
# TODO: schedule this as FROM to go to CPU, and a FROM to go to device
buffer.realized = Device[buffer.device].buffer.fromCPU(src.realized.toCPU(), **buffer._device_extra_args())
# *** n op LoadOps ***

View File

@@ -31,7 +31,7 @@ class RawTorchBuffer(RawBuffer):
def __init__(self, size:int, dtype:DType, buf:Optional[torch.Tensor]=None): super().__init__(size, dtype, buf if buf is not None else torch.empty([size], dtype=inverse_type_map[dtype]))
@classmethod
def fromCPU(cls, x):
buf = torch.from_numpy(x).requires_grad_(False).to(device)
buf = torch.from_numpy(x if all(s>=0 for s in x.strides) else x.copy()).requires_grad_(False).to(device)
return cls(prod(x.shape), type_map[buf.dtype], buf)
def toCPU(self): return self._buf.cpu().numpy()
TorchBuffer = Interpreted(RawTorchBuffer, torch_fxn_for_op, from_underlying=lambda x: RawTorchBuffer(prod(x.shape), type_map[x.dtype], x))

View File

@@ -97,7 +97,7 @@ class Tensor:
# TODO: this is a hack for writing to DISK
if self.device.startswith("DISK"):
if x.__class__ is not Tensor: x = Tensor(x, device="CPU", dtype=self.dtype)
self.lazydata.contiguous().realize().realized._copyin(x.numpy()) # type: ignore
self.contiguous().realize().lazydata.realized._copyin(x.numpy()) # type: ignore
return self
if x.__class__ is not Tensor: x = Tensor(x, device=self.device, dtype=self.dtype)
assert self.shape == x.shape and self.device == x.device, f"assign shape mismatch {self.shape} != {x.shape} or device mismatch {self.device} != {x.device}"
@@ -107,8 +107,11 @@ class Tensor:
self.lazydata = x.lazydata
return self
def detach(self): return Tensor(self.lazydata, device=self.device, requires_grad=False)
def numpy(self) -> np.ndarray: return self.lazydata.toCPU()
def detach(self) -> Tensor: return Tensor(self.lazydata, device=self.device, requires_grad=False)
def numpy(self) -> np.ndarray:
assert all_int(self.shape), f"no numpy if shape is symbolic, {self.shape=}"
assert self.dtype.np is not None, f"no numpy dtype for {self.dtype}"
return self.detach().cast(dtypes.from_np(self.dtype.np)).contiguous().to('CPU').realize().lazydata.realized._buf.reshape(self.shape)
# TODO: if things are realized this won't work
def to_(self, device:str):