new style device (#2530)

* cpu tests pass * torch works * works * metal works * fix ops_disk * metal jit works * fix openpilot * llvm and clang work * fix webgpu * docs are rly broken * LRU works on metal * delete comment * revert name to ._buf. LRU only on Compiled * changes * allocator * allocator, getting closer * lru alloc * LRUAllocator * all pass * metal * cuda * test examples * linearizer * test fixes * fix custom + clean realize * fix hip * skip tests * fix tests * fix size=0 * fix MOCKHIP * fix thneed * copy better * simple * old style metal copy * fix thneed * np reshape * give cuda a device
2026-04-29 03:00:14 -04:00 · 2023-11-30 17:07:16 -08:00
parent e56511b59a
commit 2c363b5f0b
38 changed files with 572 additions and 1039 deletions
--- a/test/test_linearizer.py
+++ b/test/test_linearizer.py
@@ -3,7 +3,7 @@ import unittest, os

 from tinygrad.codegen.kernel import Opt, OptOps, tensor_cores
 from tinygrad.codegen.linearizer import Linearizer, UOp, UOps
-from tinygrad.device import Compiled, Device
+from tinygrad.device import Compiled, Device, Buffer
 from tinygrad.ops import BufferOps, MemBuffer, ConstBuffer, LazyOp, LoadOps, TernaryOps
 from tinygrad.shape.shapetracker import ShapeTracker
 from tinygrad.shape.view import View
@@ -140,7 +140,7 @@ def helper_realized_ast(r:Tensor):
  s = r.lazydata.schedule()
  run_schedule(s[:-1])  # run all kernels except the last one
  # now all input LazyBuffers buffers in s[-1] should be realized
-  output_buffer = Device[s[-1].out.device].buffer(prod((s if isinstance(s, int) else s.max for s in s[-1].out.shape)), s[-1].out.dtype, **s[-1].out._device_extra_args())  # allocate an output buffer
+  output_buffer = Buffer(s[-1].out.device, prod((s if isinstance(s, int) else s.max for s in s[-1].out.shape)), s[-1].out.dtype, **s[-1].out._device_extra_args())  # allocate an output buffer
  return s[-1].ast, [output_buffer] + [l.realized for l in s[-1].inputs]

 class TestFloat4(unittest.TestCase):
@@ -367,7 +367,7 @@ def helper_linearizer_opt(r:Tensor, opts=[], apply_tc=False):
      for opt in opts:
        k.apply_opt(opt)
    prg = to_prg(k)
-    real_bufs[0] = real_bufs[0].fromCPU(np.zeros((real_bufs[0].size, ), dtype=real_bufs[0].dtype.np)) # Zero to check that all values are filled
+    real_bufs[0].copyin(np.zeros((real_bufs[0].size, ), dtype=real_bufs[0].dtype.np).data) # Zero to check that all values are filled
    prg.exec(real_bufs)
    np.testing.assert_allclose(wanna_output, real_bufs[0].toCPU(), atol=1e-4, rtol=1e-4)

@@ -381,7 +381,7 @@ def helper_linearizer_opt(r:Tensor, opts=[], apply_tc=False):
  k = Linearizer(realized_ast)
  k.hand_coded_optimizations()
  prg = Device[Device.DEFAULT].to_program(k)
-  real_bufs[0] = real_bufs[0].fromCPU(np.zeros((real_bufs[0].size, ), dtype=real_bufs[0].dtype.np)) # Zero to check that all values are filled
+  real_bufs[0].copyin(np.zeros((real_bufs[0].size, ), dtype=real_bufs[0].dtype.np).data) # Zero to check that all values are filled
  prg.exec(real_bufs)
  np.testing.assert_allclose(wanna_output, real_bufs[0].toCPU(), atol=1e-4, rtol=1e-4)
  for x in opts: # Check custom transformations if any.