diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b377e14ed3..ed46a00c06 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,9 +21,7 @@ repos: pass_filenames: false - id: docs name: docs - entry: | - python3 docs/abstractions.py - python3 docs/abstractions2.py + entry: python3 docs/abstractions.py && python3 docs/abstractions2.py language: system always_run: true pass_filenames: false diff --git a/docs/abstractions2.py b/docs/abstractions2.py index ef62986da7..357eb01ccf 100644 --- a/docs/abstractions2.py +++ b/docs/abstractions2.py @@ -73,14 +73,16 @@ assert val == 5 print("******** third, the LazyBuffer ***********") -from tinygrad.lazy import LazyBuffer +from tinygrad.lazy import LazyBuffer, LoadOps from tinygrad.realize import run_schedule # allocate some values + load in values # TODO: remove numpy here import numpy as np -a = LazyBuffer.fromCPU(np.array([2], np.int32)).copy_to_device(DEVICE) -b = LazyBuffer.fromCPU(np.array([3], np.int32)).copy_to_device(DEVICE) +a = LazyBuffer.loadop(LoadOps.EMPTY, (1,), dtypes.int32, "CPU") +b = LazyBuffer.loadop(LoadOps.EMPTY, (1,), dtypes.int32, "CPU") +a.realized = Buffer("CPU", 1, dtypes.int32, np.array([2], np.int32).flatten()) +b.realized = Buffer("CPU", 1, dtypes.int32, np.array([3], np.int32).flatten()) # describe the computation out = a.e(BinaryOps.ADD, b) diff --git a/test/test_lazybuffer.py b/test/test_lazybuffer.py index cda8b4c467..69951410d6 100644 --- a/test/test_lazybuffer.py +++ b/test/test_lazybuffer.py @@ -1,20 +1,14 @@ #!/usr/bin/env python import numpy as np import unittest -from tinygrad.lazy import LazyBuffer from tinygrad import Tensor, Device, dtypes from tinygrad.device import Interpreted class TestLazyBuffer(unittest.TestCase): - @unittest.skip("it doesn't work like this anymore") - def test_fromcpu_buffer_sharing(self): - a = np.arange(8) - assert LazyBuffer.fromCPU(a).realized._buf is a - def test_fromcpu_shape_tracker(self): def helper(a: np.ndarray): print(a.shape, a.strides, a.flags.c_contiguous) - b = LazyBuffer.fromCPU(a) + b = Tensor(a).lazydata #assert b.st.contiguous == a.flags.c_contiguous assert b.st.shape == a.shape np.testing.assert_equal(a, Tensor(b).numpy()) diff --git a/test/test_lazyop.py b/test/test_lazyop.py index ccd5ad4d41..7e097d3d4a 100644 --- a/test/test_lazyop.py +++ b/test/test_lazyop.py @@ -24,7 +24,7 @@ class TestLazyOp(unittest.TestCase): def test_selfreferential_speed(self): st = time.monotonic() for i in range(25): - p = LazyBuffer.fromCPU(np.array([1])) + p = Tensor([1]).lazydata for _ in range(i): p = p.e(BinaryOps.ADD, p) # sanity check if caching works this should be way faster assert time.monotonic() -st < 0.5, f"{i}" diff --git a/tinygrad/lazy.py b/tinygrad/lazy.py index 3e2ab2aa29..4ed30c4a5b 100644 --- a/tinygrad/lazy.py +++ b/tinygrad/lazy.py @@ -1,6 +1,5 @@ from __future__ import annotations import sys, math -import numpy as np from collections import defaultdict from typing import Union, Optional, Any, Tuple, List, Set, Dict, DefaultDict, cast from tinygrad.dtype import dtypes, DType, ImageDType @@ -77,12 +76,6 @@ class LazyBuffer: def schedule(self, seen=None): return create_schedule([self], seen) - @staticmethod - def fromCPU(x: np.ndarray) -> LazyBuffer: - ret = LazyBuffer("CPU", ShapeTracker.from_shape(x.shape), dtypes.from_np(x.dtype), op=LoadOps.EMPTY) - ret.realized = Buffer("CPU", x.size, dtypes.from_np(x.dtype), x.flatten()) - return ret - def copy_to_device(self, device:str) -> LazyBuffer: # no COPY if self.device == device: return self diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 9ed2db19bd..3263dc6b07 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -41,6 +41,11 @@ def _loadop(op, shape:Tuple[sint,...], dtype:DType, device:Union[str, Tuple[str, if isinstance(device, str): return LazyBuffer.loadop(op, shape, dtype, device, arg, src) return MultiLazyBuffer([LazyBuffer.loadop(op, shape, dtype, d, arg, src) for d in device], None) +def _fromcpu(x: np.ndarray) -> LazyBuffer: + ret = LazyBuffer.loadop(LoadOps.EMPTY, x.shape, dtypes.from_np(x.dtype), "CPU") + ret.realized = Buffer("CPU", prod(x.shape), dtypes.from_np(x.dtype), x.flatten()) + return ret + Scalar = Union[float, int, bool] class Tensor: @@ -68,17 +73,17 @@ class Tensor: self._ctx: Optional[Function] = None if isinstance(data, LazyBuffer): assert dtype is None or dtype == data.dtype, "dtype doesn't match, and casting isn't supported" elif isinstance(data, get_args(Scalar)): data = _loadop(LoadOps.CONST, tuple(), dtype or dtypes.from_py(data), device, data) - elif isinstance(data, bytes): data = LazyBuffer.fromCPU(np.frombuffer(data, np.uint8)) + elif isinstance(data, bytes): data = _fromcpu(np.frombuffer(data, np.uint8)) elif data is None: data = _loadop(LoadOps.EMPTY, (0,), dtype or dtypes.default_float, device) elif isinstance(data, list): if (d := fully_flatten(data)) and all(isinstance(s, bool) for s in d): dtype = dtype or dtypes.bool elif d and all_int(d): dtype = dtype or dtypes.default_int else: dtype = dtype or dtypes.default_float # NOTE: cast at the end for the dtypes that do not have a numpy dtype - data = LazyBuffer.fromCPU(np.array(data, dtype.np)).cast(dtype) + data = _fromcpu(np.array(data, dtype.np)).cast(dtype) elif isinstance(data, np.ndarray): if data.shape == (): data = _loadop(LoadOps.CONST, tuple(), dtype or dtypes.from_np(data.dtype), device, data.item()) - else: data = LazyBuffer.fromCPU(data.astype(dtype.np) if dtype is not None and dtype.np is not None else data) + else: data = _fromcpu(data.astype(dtype.np) if dtype is not None and dtype.np is not None else data) # data is a LazyBuffer, but it might be on the wrong device if not isinstance(data, (LazyBuffer, MultiLazyBuffer)): raise RuntimeError(f"can't create Tensor from {data!r} with type {type(data)}")