move fromcpu out of lazy.py (#3122)

* move fromcpu out of lazy.py * fix abstractions2
2026-01-08 22:48:25 -05:00 · 2024-01-14 18:21:08 -08:00
parent 96345061d3
commit ea5824657d
6 changed files with 16 additions and 24 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,9 +21,7 @@ repos:
        pass_filenames: false
      - id: docs
        name: docs
-        entry: |
+        entry: python3 docs/abstractions.py && python3 docs/abstractions2.py
          python3 docs/abstractions.py
          python3 docs/abstractions2.py
        language: system
        always_run: true
        pass_filenames: false
--- a/docs/abstractions2.py
+++ b/docs/abstractions2.py
@@ -73,14 +73,16 @@ assert val == 5
 print("******** third, the LazyBuffer ***********")
-from tinygrad.lazy import LazyBuffer
+from tinygrad.lazy import LazyBuffer, LoadOps
 from tinygrad.realize import run_schedule
 # allocate some values + load in values
 # TODO: remove numpy here
 import numpy as np
-a = LazyBuffer.fromCPU(np.array([2], np.int32)).copy_to_device(DEVICE)
+a = LazyBuffer.loadop(LoadOps.EMPTY, (1,), dtypes.int32, "CPU")
-b = LazyBuffer.fromCPU(np.array([3], np.int32)).copy_to_device(DEVICE)
+b = LazyBuffer.loadop(LoadOps.EMPTY, (1,), dtypes.int32, "CPU")
 a.realized = Buffer("CPU", 1, dtypes.int32, np.array([2], np.int32).flatten())
 b.realized = Buffer("CPU", 1, dtypes.int32, np.array([3], np.int32).flatten())
 # describe the computation
 out = a.e(BinaryOps.ADD, b)
--- a/test/test_lazybuffer.py
+++ b/test/test_lazybuffer.py
@@ -1,20 +1,14 @@
 #!/usr/bin/env python
 import numpy as np
 import unittest
 from tinygrad.lazy import LazyBuffer
 from tinygrad import Tensor, Device, dtypes
 from tinygrad.device import Interpreted
 class TestLazyBuffer(unittest.TestCase):
  @unittest.skip("it doesn't work like this anymore")
  def test_fromcpu_buffer_sharing(self):
    a = np.arange(8)
    assert LazyBuffer.fromCPU(a).realized._buf is a
  def test_fromcpu_shape_tracker(self):
    def helper(a: np.ndarray):
      print(a.shape, a.strides, a.flags.c_contiguous)
-      b = LazyBuffer.fromCPU(a)
+      b = Tensor(a).lazydata
      #assert b.st.contiguous == a.flags.c_contiguous
      assert b.st.shape == a.shape
      np.testing.assert_equal(a, Tensor(b).numpy())
--- a/test/test_lazyop.py
+++ b/test/test_lazyop.py
@@ -24,7 +24,7 @@ class TestLazyOp(unittest.TestCase):
  def test_selfreferential_speed(self):
    st = time.monotonic()
    for i in range(25):
-      p = LazyBuffer.fromCPU(np.array([1]))
+      p = Tensor([1]).lazydata
      for _ in range(i): p = p.e(BinaryOps.ADD, p)
      # sanity check if caching works this should be way faster
      assert time.monotonic() -st < 0.5, f"{i}"
--- a/tinygrad/lazy.py
+++ b/tinygrad/lazy.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 import sys, math
 import numpy as np
 from collections import defaultdict
 from typing import Union, Optional, Any, Tuple, List, Set, Dict, DefaultDict, cast
 from tinygrad.dtype import dtypes, DType, ImageDType
@@ -77,12 +76,6 @@ class LazyBuffer:
  def schedule(self, seen=None): return create_schedule([self], seen)
  @staticmethod
  def fromCPU(x: np.ndarray) -> LazyBuffer:
    ret = LazyBuffer("CPU", ShapeTracker.from_shape(x.shape), dtypes.from_np(x.dtype), op=LoadOps.EMPTY)
    ret.realized = Buffer("CPU", x.size, dtypes.from_np(x.dtype), x.flatten())
    return ret
  def copy_to_device(self, device:str) -> LazyBuffer:
    # no COPY
    if self.device == device: return self
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -41,6 +41,11 @@ def _loadop(op, shape:Tuple[sint,...], dtype:DType, device:Union[str, Tuple[str,
  if isinstance(device, str): return LazyBuffer.loadop(op, shape, dtype, device, arg, src)
  return MultiLazyBuffer([LazyBuffer.loadop(op, shape, dtype, d, arg, src) for d in device], None)
 def _fromcpu(x: np.ndarray) -> LazyBuffer:
  ret = LazyBuffer.loadop(LoadOps.EMPTY, x.shape, dtypes.from_np(x.dtype), "CPU")
  ret.realized = Buffer("CPU", prod(x.shape), dtypes.from_np(x.dtype), x.flatten())
  return ret
 Scalar = Union[float, int, bool]
 class Tensor:
@@ -68,17 +73,17 @@ class Tensor:
    self._ctx: Optional[Function] = None
    if isinstance(data, LazyBuffer): assert dtype is None or dtype == data.dtype, "dtype doesn't match, and casting isn't supported"
    elif isinstance(data, get_args(Scalar)): data = _loadop(LoadOps.CONST, tuple(), dtype or dtypes.from_py(data), device, data)
-    elif isinstance(data, bytes): data = LazyBuffer.fromCPU(np.frombuffer(data, np.uint8))
+    elif isinstance(data, bytes): data = _fromcpu(np.frombuffer(data, np.uint8))
    elif data is None: data = _loadop(LoadOps.EMPTY, (0,), dtype or dtypes.default_float, device)
    elif isinstance(data, list):
      if (d := fully_flatten(data)) and all(isinstance(s, bool) for s in d): dtype = dtype or dtypes.bool
      elif d and all_int(d): dtype = dtype or dtypes.default_int
      else: dtype = dtype or dtypes.default_float
      # NOTE: cast at the end for the dtypes that do not have a numpy dtype
-      data = LazyBuffer.fromCPU(np.array(data, dtype.np)).cast(dtype)
+      data = _fromcpu(np.array(data, dtype.np)).cast(dtype)
    elif isinstance(data, np.ndarray):
      if data.shape == (): data = _loadop(LoadOps.CONST, tuple(), dtype or dtypes.from_np(data.dtype), device, data.item())
-      else: data = LazyBuffer.fromCPU(data.astype(dtype.np) if dtype is not None and dtype.np is not None else data)
+      else: data = _fromcpu(data.astype(dtype.np) if dtype is not None and dtype.np is not None else data)
    # data is a LazyBuffer, but it might be on the wrong device
    if not isinstance(data, (LazyBuffer, MultiLazyBuffer)): raise RuntimeError(f"can't create Tensor from {data!r} with type {type(data)}")