hcq: move cpu to hcq (#11262)

* hcq: move cpu to hcq

* import time

* upd

* fix

* windows support

* hm

* cleaner

* fix timer

* fix timing

* std is ns

* skip profiler

* mypy

* cleaner

* cleanups

* after merge

* default is back
This commit is contained in:
nimlgen
2025-07-21 15:10:38 +03:00
committed by GitHub
parent 816c01c2d4
commit cc3c1e4c14
11 changed files with 131 additions and 108 deletions

View File

@@ -7,28 +7,30 @@
print("******** first, the runtime ***********")
from tinygrad.runtime.ops_cpu import ClangJITCompiler, MallocAllocator, CPUProgram
from tinygrad.runtime.ops_cpu import ClangJITCompiler, CPUDevice, CPUProgram
cpu = CPUDevice()
# allocate some buffers
out = MallocAllocator.alloc(4)
a = MallocAllocator.alloc(4)
b = MallocAllocator.alloc(4)
out = cpu.allocator.alloc(4)
a = cpu.allocator.alloc(4)
b = cpu.allocator.alloc(4)
# load in some values (little endian)
MallocAllocator._copyin(a, memoryview(bytearray([2,0,0,0])))
MallocAllocator._copyin(b, memoryview(bytearray([3,0,0,0])))
cpu.allocator._copyin(a, memoryview(bytearray([2,0,0,0])))
cpu.allocator._copyin(b, memoryview(bytearray([3,0,0,0])))
# compile a program to a binary
lib = ClangJITCompiler().compile("void add(int *out, int *a, int *b) { out[0] = a[0] + b[0]; }")
# create a runtime for the program
fxn = CPUProgram("add", lib)
fxn = cpu.runtime("add", lib)
# run the program
fxn(out, a, b)
# check the data out
print(val := MallocAllocator._as_buffer(out).cast("I").tolist()[0])
print(val := cpu.allocator._as_buffer(out).cast("I").tolist()[0])
assert val == 5
@@ -46,7 +48,7 @@ from tinygrad.shape.shapetracker import ShapeTracker
out = Buffer(DEVICE, 1, dtypes.int32).allocate()
a = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))
b = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))
# NOTE: a._buf is the same as the return from MallocAllocator.alloc
# NOTE: a._buf is the same as the return from cpu.allocator.alloc
# describe the computation
buf_1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 1)