new style device (#2530)

* cpu tests pass * torch works * works * metal works * fix ops_disk * metal jit works * fix openpilot * llvm and clang work * fix webgpu * docs are rly broken * LRU works on metal * delete comment * revert name to ._buf. LRU only on Compiled * changes * allocator * allocator, getting closer * lru alloc * LRUAllocator * all pass * metal * cuda * test examples * linearizer * test fixes * fix custom + clean realize * fix hip * skip tests * fix tests * fix size=0 * fix MOCKHIP * fix thneed * copy better * simple * old style metal copy * fix thneed * np reshape * give cuda a device
2026-04-07 03:00:26 -04:00 · 2023-11-30 17:07:16 -08:00
parent e56511b59a
commit 2c363b5f0b
38 changed files with 572 additions and 1039 deletions
--- a/test/test_uops.py
+++ b/test/test_uops.py
@@ -2,16 +2,16 @@ from typing import Optional, Tuple, Any, List
 import unittest, math
 import numpy as np
 from tinygrad.helpers import dtypes, getenv, DType, PtrDType
-from tinygrad.tensor import Device
+from tinygrad.device import Buffer, Device
 from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps
 from tinygrad.device import CompiledASTRunner, Compiled
 from tinygrad.codegen.linearizer import UOps, UOp

-def _uops_to_prg(uops):
+def _uops_to_prg(uops, bufcount):
  src, runtime_args = Device[Device.DEFAULT].renderer("test", uops)
  return CompiledASTRunner(None, "test", src,
                           [1] if Device[Device.DEFAULT].linearizer_opts.has_local else None, [1] if Device[Device.DEFAULT].linearizer_opts.has_local else None,
-                           runtime_args=runtime_args).build(Device[Device.DEFAULT].compiler, Device[Device.DEFAULT].runtime)
+                           runtime_args=runtime_args, bufcount=bufcount).build(Device[Device.DEFAULT].compiler, Device[Device.DEFAULT].runtime)

 def uop(uops:List[UOp], uop:UOps, dtype:Optional[DType], vin:Tuple[UOp, ...], arg:Any=None) -> UOp:
  uops.append(UOp(uop, dtype, tuple(vin), arg))
@@ -24,9 +24,9 @@ def _test_single_value(vals, op, dtype):
  loads = (uop(uops, UOps.LOAD, dtype, [buf_loads[i], uop(uops, UOps.CONST, dtypes.int32, (), 0)]) for i in range(len(vals)))
  alu = uop(uops, UOps.ALU, dtype, loads, op)
  uop(uops, UOps.STORE, None, (buf_store, uop(uops, UOps.CONST, dtypes.int32, (), 0), alu))
-  buf = Device[Device.DEFAULT].buffer(1, dtype)
-  buf2 = [Device[Device.DEFAULT].buffer.fromCPU(np.array([a], dtype=dtype.np)) for a in vals]
-  prg = _uops_to_prg(uops)
+  buf = Buffer(Device.DEFAULT, 1, dtype)
+  buf2 = [Buffer.fromCPU(Device.DEFAULT, np.array([a], dtype=dtype.np)) for a in vals]
+  prg = _uops_to_prg(uops, 1+len(buf2))
  prg.exec([buf]+buf2)
  return buf.toCPU()[0]

@@ -36,8 +36,8 @@ def _test_single_value_const(vals, op, dtype):
  loads = (uop(uops, UOps.CONST, dtype, [], a) for a in vals)
  alu = uop(uops, UOps.ALU, dtype, loads, op)
  uop(uops, UOps.STORE, None, (buf_store, uop(uops, UOps.CONST, dtypes.int32, (), 0), alu))
-  buf = Device[Device.DEFAULT].buffer(1, dtype)
-  prg = _uops_to_prg(uops)
+  buf = Buffer(Device.DEFAULT, 1, dtype)
+  prg = _uops_to_prg(uops, 1)
  prg.exec([buf])
  return buf.toCPU()[0]