move all to compile api (#2203)

* move metal+clang to compile api * all to the new style * remove binary arg * fix triton * fixup tests * fix clang * diskcache is generic * __wrapped__ * compile_gpu * fix thneed * keep the src in the ASTRunner * lib * move compile_gpu * compile_gpu in device * put compiler in astrunner * test reverts * triton compiler * ugh, that too
2026-01-10 07:28:15 -05:00 · 2023-11-01 23:01:32 -07:00
parent 8932816816
commit 03cf0afa4f
18 changed files with 128 additions and 136 deletions
--- a/docs/abstractions.py
+++ b/docs/abstractions.py
@@ -217,7 +217,7 @@ from tinygrad.runtime.lib import RawMallocBuffer
 # ClangProgram is the simplest runtime (in tinygrad/runtime/ops_clang.py, code 7/10)
 # __init__ calls clang, and __call__ calls the function in the *.so outputted by clang
 # in CLANG, global_size and local_size are ignored
-from tinygrad.runtime.ops_clang import ClangProgram
+from tinygrad.runtime.ops_clang import ClangProgram, compile_clang

 # a concrete example looks like this, this adds two size 1 RawBuffer
 # first we create two numpy buffers containing 2 and 3
@@ -229,7 +229,7 @@ input_a, input_b = RawMallocBuffer.fromCPU(numpy_a), RawMallocBuffer.fromCPU(num
 output = RawMallocBuffer(1, dtypes.float32)

 # compile the program, run it, and 2+3 does indeed equal 5
-program = ClangProgram("add", f"void add(float *a, float *b, float *c) {{ *a = *b + *c; }}")
+program = ClangProgram("add", compile_clang(f"void add(float *a, float *b, float *c) {{ *a = *b + *c; }}"))
 program(None, None, output, input_a, input_b)  # NOTE: the None are for global_size and local_size
 print(output.toCPU())
 assert output.toCPU()[0] == 5, "it's still 5"