mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
think about removing realize from lazybuffer (#1965)
* remove realize from lazybuffer * okay fine, back that off * fix tests maybe * fix test
This commit is contained in:
@@ -18,7 +18,7 @@ class TestLazyBuffer(unittest.TestCase):
|
||||
b = LazyBuffer.fromCPU(a).realize()
|
||||
#assert b.st.contiguous == a.flags.c_contiguous
|
||||
assert b.st.shape == a.shape
|
||||
np.testing.assert_equal(a, b.toCPU())
|
||||
np.testing.assert_equal(a, Tensor(b).numpy())
|
||||
|
||||
for ndims in range(1, 4):
|
||||
a = np.random.randn(*(4,)*ndims).astype(np.float32)
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import Callable, Optional, Tuple, Union, List, Dict, Any, cast, Mapp
|
||||
from weakref import ref, WeakSet, WeakValueDictionary
|
||||
|
||||
import numpy as np
|
||||
from tinygrad.helpers import prod, getenv, DType, dtypes, flatten, ImageDType, partition, all_int, dedup, merge_dicts
|
||||
from tinygrad.helpers import prod, getenv, DType, dtypes, flatten, ImageDType, partition, dedup, merge_dicts
|
||||
from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps, ReduceOps, MovementOps, LoadOps, OpType, LazyOp, MemBuffer, ConstBuffer, BufferOps
|
||||
from tinygrad.shape.shapetracker import ShapeTracker, get_contraction
|
||||
from tinygrad.shape.symbolic import Variable, sint
|
||||
@@ -223,15 +223,6 @@ class LazyBuffer:
|
||||
def fromCPU(x: np.ndarray) -> LazyBuffer:
|
||||
return LazyBuffer("CPU", ShapeTracker.from_shape(x.shape), LoadOps, None, dtypes.from_np(x.dtype), {}, RawNumpyBuffer.fromCPU(x))
|
||||
|
||||
def prepare_transfer(self):
|
||||
self_casted = self.e(UnaryOps.CAST, arg=(dtypes.from_np(self.dtype.np), False)) if dtypes.from_np(self.dtype.np) != self.dtype else self
|
||||
return self_casted.contiguous().realize().realized
|
||||
|
||||
def toCPU(self) -> np.ndarray:
|
||||
assert self.dtype.np, f"{self.dtype} is not supported in toCPU"
|
||||
assert all_int(self.shape), f"no toCPU if shape is symbolic, {self.shape=}"
|
||||
return cast(RawBuffer, self.prepare_transfer()).toCPU().reshape(self.shape)
|
||||
|
||||
# *** elementwise ops ***
|
||||
|
||||
def e(self:LazyBuffer, op:Union[UnaryOps, BinaryOps, TernaryOps], *srcs:LazyBuffer, arg:Optional[Any]=None) -> LazyBuffer:
|
||||
|
||||
@@ -47,16 +47,19 @@ def _realize_contiguous(buffer: LazyBuffer, src: LazyBuffer) -> None:
|
||||
assert buffer.dtype == src.dtype, f"contiguous dtype mismatch, expecting {buffer.dtype}, got {src.dtype}"
|
||||
|
||||
def _realize_from(buffer: LazyBuffer, src: LazyBuffer) -> None:
|
||||
assert src.realized.size == buffer.st.size(), f"size mismatch on FROM {src.realized.size} != {buffer.st.size()}"
|
||||
assert src.st.contiguous and buffer.st.contiguous, "all must be contiguous for from"
|
||||
if DEBUG >= 3: print(f"*** copy {buffer.device} <- {src.device} size {src.realized.size} dtype {src.realized.dtype}")
|
||||
# TODO: make this generic
|
||||
if isinstance(src.realized, RawDiskBuffer) and issubclass(Device[buffer.device].buffer, RawBufferMapped):
|
||||
assert all_int(buffer.shape), "does not support symbolic shape"
|
||||
buffer.realized = Device[buffer.device].buffer(prod(buffer.shape), buffer.dtype, **buffer._device_extra_args())
|
||||
src.prepare_transfer().readinto(cast(RawBufferMapped, buffer.realized)._buffer())
|
||||
src.realized.readinto(cast(RawBufferMapped, buffer.realized)._buffer())
|
||||
elif isinstance(src.realized, RawBufferTransfer) and issubclass(Device[buffer.device].buffer, RawBufferTransfer) and P2P >= 1:
|
||||
buffer.realized = cast(RawBufferTransfer, Device[buffer.device].buffer).transfer(src.realized, buffer.shape, buffer.dtype, **buffer._device_extra_args())
|
||||
else:
|
||||
buffer.realized = Device[buffer.device].buffer.fromCPU(src.toCPU(), **buffer._device_extra_args())
|
||||
# TODO: schedule this as FROM to go to CPU, and a FROM to go to device
|
||||
buffer.realized = Device[buffer.device].buffer.fromCPU(src.realized.toCPU(), **buffer._device_extra_args())
|
||||
|
||||
# *** n op LoadOps ***
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ class RawTorchBuffer(RawBuffer):
|
||||
def __init__(self, size:int, dtype:DType, buf:Optional[torch.Tensor]=None): super().__init__(size, dtype, buf if buf is not None else torch.empty([size], dtype=inverse_type_map[dtype]))
|
||||
@classmethod
|
||||
def fromCPU(cls, x):
|
||||
buf = torch.from_numpy(x).requires_grad_(False).to(device)
|
||||
buf = torch.from_numpy(x if all(s>=0 for s in x.strides) else x.copy()).requires_grad_(False).to(device)
|
||||
return cls(prod(x.shape), type_map[buf.dtype], buf)
|
||||
def toCPU(self): return self._buf.cpu().numpy()
|
||||
TorchBuffer = Interpreted(RawTorchBuffer, torch_fxn_for_op, from_underlying=lambda x: RawTorchBuffer(prod(x.shape), type_map[x.dtype], x))
|
||||
|
||||
@@ -97,7 +97,7 @@ class Tensor:
|
||||
# TODO: this is a hack for writing to DISK
|
||||
if self.device.startswith("DISK"):
|
||||
if x.__class__ is not Tensor: x = Tensor(x, device="CPU", dtype=self.dtype)
|
||||
self.lazydata.contiguous().realize().realized._copyin(x.numpy()) # type: ignore
|
||||
self.contiguous().realize().lazydata.realized._copyin(x.numpy()) # type: ignore
|
||||
return self
|
||||
if x.__class__ is not Tensor: x = Tensor(x, device=self.device, dtype=self.dtype)
|
||||
assert self.shape == x.shape and self.device == x.device, f"assign shape mismatch {self.shape} != {x.shape} or device mismatch {self.device} != {x.device}"
|
||||
@@ -107,8 +107,11 @@ class Tensor:
|
||||
self.lazydata = x.lazydata
|
||||
return self
|
||||
|
||||
def detach(self): return Tensor(self.lazydata, device=self.device, requires_grad=False)
|
||||
def numpy(self) -> np.ndarray: return self.lazydata.toCPU()
|
||||
def detach(self) -> Tensor: return Tensor(self.lazydata, device=self.device, requires_grad=False)
|
||||
def numpy(self) -> np.ndarray:
|
||||
assert all_int(self.shape), f"no numpy if shape is symbolic, {self.shape=}"
|
||||
assert self.dtype.np is not None, f"no numpy dtype for {self.dtype}"
|
||||
return self.detach().cast(dtypes.from_np(self.dtype.np)).contiguous().to('CPU').realize().lazydata.realized._buf.reshape(self.shape)
|
||||
|
||||
# TODO: if things are realized this won't work
|
||||
def to_(self, device:str):
|
||||
|
||||
Reference in New Issue
Block a user