move fromcpu out of lazy.py (#3122)

* move fromcpu out of lazy.py * fix abstractions2
2026-01-08 22:48:25 -05:00 · 2024-01-14 18:21:08 -08:00
parent 96345061d3
commit ea5824657d
6 changed files with 16 additions and 24 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,9 +21,7 @@ repos:
        pass_filenames: false
      - id: docs
        name: docs
-        entry: |
-          python3 docs/abstractions.py
-          python3 docs/abstractions2.py
+        entry: python3 docs/abstractions.py && python3 docs/abstractions2.py
        language: system
        always_run: true
        pass_filenames: false
--- a/docs/abstractions2.py
+++ b/docs/abstractions2.py
@@ -73,14 +73,16 @@ assert val == 5

 print("******** third, the LazyBuffer ***********")

-from tinygrad.lazy import LazyBuffer
+from tinygrad.lazy import LazyBuffer, LoadOps
 from tinygrad.realize import run_schedule

 # allocate some values + load in values
 # TODO: remove numpy here
 import numpy as np
-a = LazyBuffer.fromCPU(np.array([2], np.int32)).copy_to_device(DEVICE)
-b = LazyBuffer.fromCPU(np.array([3], np.int32)).copy_to_device(DEVICE)
+a = LazyBuffer.loadop(LoadOps.EMPTY, (1,), dtypes.int32, "CPU")
+b = LazyBuffer.loadop(LoadOps.EMPTY, (1,), dtypes.int32, "CPU")
+a.realized = Buffer("CPU", 1, dtypes.int32, np.array([2], np.int32).flatten())
+b.realized = Buffer("CPU", 1, dtypes.int32, np.array([3], np.int32).flatten())

 # describe the computation
 out = a.e(BinaryOps.ADD, b)
--- a/test/test_lazybuffer.py
+++ b/test/test_lazybuffer.py
@@ -1,20 +1,14 @@
 #!/usr/bin/env python
 import numpy as np
 import unittest
-from tinygrad.lazy import LazyBuffer
 from tinygrad import Tensor, Device, dtypes
 from tinygrad.device import Interpreted

 class TestLazyBuffer(unittest.TestCase):
-  @unittest.skip("it doesn't work like this anymore")
-  def test_fromcpu_buffer_sharing(self):
-    a = np.arange(8)
-    assert LazyBuffer.fromCPU(a).realized._buf is a
-
  def test_fromcpu_shape_tracker(self):
    def helper(a: np.ndarray):
      print(a.shape, a.strides, a.flags.c_contiguous)
-      b = LazyBuffer.fromCPU(a)
+      b = Tensor(a).lazydata
      #assert b.st.contiguous == a.flags.c_contiguous
      assert b.st.shape == a.shape
      np.testing.assert_equal(a, Tensor(b).numpy())
--- a/test/test_lazyop.py
+++ b/test/test_lazyop.py
@@ -24,7 +24,7 @@ class TestLazyOp(unittest.TestCase):
  def test_selfreferential_speed(self):
    st = time.monotonic()
    for i in range(25):
-      p = LazyBuffer.fromCPU(np.array([1]))
+      p = Tensor([1]).lazydata
      for _ in range(i): p = p.e(BinaryOps.ADD, p)
      # sanity check if caching works this should be way faster
      assert time.monotonic() -st < 0.5, f"{i}"
--- a/tinygrad/lazy.py
+++ b/tinygrad/lazy.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 import sys, math
-import numpy as np
 from collections import defaultdict
 from typing import Union, Optional, Any, Tuple, List, Set, Dict, DefaultDict, cast
 from tinygrad.dtype import dtypes, DType, ImageDType
@@ -77,12 +76,6 @@ class LazyBuffer:

  def schedule(self, seen=None): return create_schedule([self], seen)

-  @staticmethod
-  def fromCPU(x: np.ndarray) -> LazyBuffer:
-    ret = LazyBuffer("CPU", ShapeTracker.from_shape(x.shape), dtypes.from_np(x.dtype), op=LoadOps.EMPTY)
-    ret.realized = Buffer("CPU", x.size, dtypes.from_np(x.dtype), x.flatten())
-    return ret
-
  def copy_to_device(self, device:str) -> LazyBuffer:
    # no COPY
    if self.device == device: return self
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -41,6 +41,11 @@ def _loadop(op, shape:Tuple[sint,...], dtype:DType, device:Union[str, Tuple[str,
  if isinstance(device, str): return LazyBuffer.loadop(op, shape, dtype, device, arg, src)
  return MultiLazyBuffer([LazyBuffer.loadop(op, shape, dtype, d, arg, src) for d in device], None)

+def _fromcpu(x: np.ndarray) -> LazyBuffer:
+  ret = LazyBuffer.loadop(LoadOps.EMPTY, x.shape, dtypes.from_np(x.dtype), "CPU")
+  ret.realized = Buffer("CPU", prod(x.shape), dtypes.from_np(x.dtype), x.flatten())
+  return ret
+
 Scalar = Union[float, int, bool]

 class Tensor:
@@ -68,17 +73,17 @@ class Tensor:
    self._ctx: Optional[Function] = None
    if isinstance(data, LazyBuffer): assert dtype is None or dtype == data.dtype, "dtype doesn't match, and casting isn't supported"
    elif isinstance(data, get_args(Scalar)): data = _loadop(LoadOps.CONST, tuple(), dtype or dtypes.from_py(data), device, data)
-    elif isinstance(data, bytes): data = LazyBuffer.fromCPU(np.frombuffer(data, np.uint8))
+    elif isinstance(data, bytes): data = _fromcpu(np.frombuffer(data, np.uint8))
    elif data is None: data = _loadop(LoadOps.EMPTY, (0,), dtype or dtypes.default_float, device)
    elif isinstance(data, list):
      if (d := fully_flatten(data)) and all(isinstance(s, bool) for s in d): dtype = dtype or dtypes.bool
      elif d and all_int(d): dtype = dtype or dtypes.default_int
      else: dtype = dtype or dtypes.default_float
      # NOTE: cast at the end for the dtypes that do not have a numpy dtype
-      data = LazyBuffer.fromCPU(np.array(data, dtype.np)).cast(dtype)
+      data = _fromcpu(np.array(data, dtype.np)).cast(dtype)
    elif isinstance(data, np.ndarray):
      if data.shape == (): data = _loadop(LoadOps.CONST, tuple(), dtype or dtypes.from_np(data.dtype), device, data.item())
-      else: data = LazyBuffer.fromCPU(data.astype(dtype.np) if dtype is not None and dtype.np is not None else data)
+      else: data = _fromcpu(data.astype(dtype.np) if dtype is not None and dtype.np is not None else data)

    # data is a LazyBuffer, but it might be on the wrong device
    if not isinstance(data, (LazyBuffer, MultiLazyBuffer)): raise RuntimeError(f"can't create Tensor from {data!r} with type {type(data)}")