new style device (#2530)

* cpu tests pass * torch works * works * metal works * fix ops_disk * metal jit works * fix openpilot * llvm and clang work * fix webgpu * docs are rly broken * LRU works on metal * delete comment * revert name to ._buf. LRU only on Compiled * changes * allocator * allocator, getting closer * lru alloc * LRUAllocator * all pass * metal * cuda * test examples * linearizer * test fixes * fix custom + clean realize * fix hip * skip tests * fix tests * fix size=0 * fix MOCKHIP * fix thneed * copy better * simple * old style metal copy * fix thneed * np reshape * give cuda a device
2026-04-07 03:00:26 -04:00 · 2023-11-30 17:07:16 -08:00
parent e56511b59a
commit 2c363b5f0b
38 changed files with 572 additions and 1039 deletions
--- a/openpilot/compile2.py
+++ b/openpilot/compile2.py
@@ -85,7 +85,6 @@ def schedule_to_thneed(schedule, output_fn):
 def thneed_test_onnx(onnx_data, output_fn):
  import onnx
  import pyopencl as cl
-  from tinygrad.runtime.ops_gpu import CL
  import numpy as np
  from extra.thneed import Thneed
  onnx_model = onnx.load(io.BytesIO(onnx_data))
@@ -118,11 +117,11 @@ def thneed_test_onnx(onnx_data, output_fn):

    # inputs
    for k,v in nt.inputs.items():
-      cl.enqueue_copy(CL.cl_queue[0], v, new_np_inputs[k], is_blocking=True)
+      cl.enqueue_copy(Device["GPU"].queue, v, new_np_inputs[k], is_blocking=True)

    nt.run()
    new_thneed_out = np.empty((nt.outputs[0].size//4,), dtype=np.float32).reshape(new_torch_out.shape)
-    cl.enqueue_copy(CL.cl_queue[0], new_thneed_out, nt.outputs[0], is_blocking=True)
+    cl.enqueue_copy(Device["GPU"].queue, new_thneed_out, nt.outputs[0], is_blocking=True)

    # compare torch to thneed
    np.testing.assert_allclose(new_torch_out, new_thneed_out, atol=1e-4, rtol=1e-2)