From cb7289f9c99be4871517b23e23eb85e2608feb33 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Sat, 4 May 2024 08:38:01 -0700 Subject: [PATCH] remove clang program header (#4422) * remove clang program header * proper max * bools are numbers * fix compile enet --- examples/efficientnet.py | 7 ++++--- extra/export_model.py | 3 +-- test/unit/test_disk_tensor.py | 4 ++-- tinygrad/renderer/cstyle.py | 9 ++++++++- tinygrad/runtime/ops_clang.py | 10 ++++------ 5 files changed, 19 insertions(+), 14 deletions(-) diff --git a/examples/efficientnet.py b/examples/efficientnet.py index 863af3d124..e8e8bd916b 100644 --- a/examples/efficientnet.py +++ b/examples/efficientnet.py @@ -83,6 +83,7 @@ if __name__ == "__main__": cv2.destroyAllWindows() else: img = Image.open(fetch(url)) - with Timing("did inference in "): - out, _ = infer(model, img) - print(np.argmax(out), np.max(out), lbls[np.argmax(out)]) + for i in range(getenv("CNT", 1)): + with Timing("did inference in "): + out, _ = infer(model, img) + print(np.argmax(out), np.max(out), lbls[np.argmax(out)]) diff --git a/extra/export_model.py b/extra/export_model.py index 20b1b78096..ba97111f3d 100644 --- a/extra/export_model.py +++ b/extra/export_model.py @@ -64,8 +64,7 @@ def jit_model(model, *args) -> Tuple[TinyJit,Dict[int,str]]: return run, special_names def export_model_clang(functions:Dict[str,str], statements:Dict[str,Tuple[str,int,int]], bufs:Dict[str,Tuple[str,int,int]], bufs_to_save:Dict[str,Tensor], input_names:List[str], output_names:List[str]) -> str: - from tinygrad.runtime.ops_clang import CLANG_PROGRAM_HEADER - cprog = [CLANG_PROGRAM_HEADER] + cprog = ["#include "] for name,cl in bufs_to_save.items(): weight = ''.join(["\\x%02X"%x for x in bytes(cl._buf)]) diff --git a/test/unit/test_disk_tensor.py b/test/unit/test_disk_tensor.py index 6095f716e8..4017aa6eaf 100644 --- a/test/unit/test_disk_tensor.py +++ b/test/unit/test_disk_tensor.py @@ -279,8 +279,8 @@ class TestDiskTensor(unittest.TestCase): np.testing.assert_array_equal(t.numpy(), np.array([3] * 10)) def test_bitcast(self): - with open(temp('bf16'), "wb") as f: f.write(bytes(range(10,20))) - t = Tensor.empty(5, dtype=dtypes.int16, device=f"disk:{temp('bf16')}") + with open(temp('range_1020'), "wb") as f: f.write(bytes(range(10,20))) + t = Tensor.empty(5, dtype=dtypes.int16, device=f"disk:{temp('range_1020')}") ret = t.to("CLANG").bitcast(dtypes.uint16) + 1 assert ret.tolist() == [2827, 3341, 3855, 4369, 4883] diff --git a/tinygrad/renderer/cstyle.py b/tinygrad/renderer/cstyle.py index 3b81da302a..71f13fc9a8 100644 --- a/tinygrad/renderer/cstyle.py +++ b/tinygrad/renderer/cstyle.py @@ -45,7 +45,8 @@ class CStyleLanguage(NamedTuple): if math.isnan(x): val = "NAN" elif math.isinf(x): val = ("-" if x < 0 else "") + "INFINITY" elif dtype == dtypes.float64: val = f"{x}" - else: val = f"{x}f" if dtypes.is_float(dtype) else f"{x}" if dtypes.is_int(dtype) else f"{x}".lower() + elif dtype == dtypes.bool: val = "1" if x else "0" + else: val = f"{x}f" if dtypes.is_float(dtype) else f"{x}" return (self.render_cast([val] * dtype.count, dtype) if dtype.count > 1 or dtype not in [dtypes.float, dtypes.int, dtypes.bool] else val) # returns a str expression of the loaded value with the output type @@ -174,6 +175,12 @@ def uops_to_cstyle(lang:CStyleLanguage, function_name:str, uops:UOpGraph) -> str return lang.render_kernel(function_name, kernel, bufs, uops) +class ClangLanguage(CStyleLanguage): + buffer_suffix = " restrict" + type_map = {dtypes.bool:"_Bool", dtypes.half:"__fp16"} + code_for_op = {**CStyleLanguage().code_for_op, BinaryOps.MAX: lambda a,b,dtype: f"(({a}>{b})?{a}:{b})"} +ClangRenderer = functools.partial(uops_to_cstyle, ClangLanguage()) + class OpenCLLanguage(CStyleLanguage): kernel_prefix = "__kernel " buffer_prefix = "__global " diff --git a/tinygrad/runtime/ops_clang.py b/tinygrad/runtime/ops_clang.py index 30a3446684..b13e026122 100644 --- a/tinygrad/runtime/ops_clang.py +++ b/tinygrad/runtime/ops_clang.py @@ -1,18 +1,16 @@ import ctypes, subprocess, pathlib, tempfile from tinygrad.device import Compiled, MallocAllocator, Compiler, CompilerOptions from tinygrad.helpers import cpu_time_execution -from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage - -CLANG_PROGRAM_HEADER = '#include \n#include \n#define max(x,y) ((x>y)?x:y)\n#define half __fp16\n' +from tinygrad.renderer.cstyle import ClangRenderer class ClangCompiler(Compiler): compiler_opts = CompilerOptions("CLANG", supports_float4=False, has_local=False) - def render(self, name:str, uops) -> str: return CLANG_PROGRAM_HEADER + uops_to_cstyle(CStyleLanguage(buffer_suffix=" restrict"), name, uops) + def render(self, name:str, uops) -> str: return ClangRenderer(name, uops) def compile(self, src:str) -> bytes: # TODO: remove file write. sadly clang doesn't like the use of /dev/stdout here with tempfile.NamedTemporaryFile(delete=True) as output_file: - subprocess.check_output(args=('clang -shared -march=native -O2 -Wall -Werror -x c -fPIC - -o '+ str(output_file.name)).split(), - input=src.encode('utf-8')) + subprocess.check_output(['clang', '-include', 'tgmath.h', '-shared', '-march=native', '-O2', '-Wall', '-Werror', '-x', 'c', '-fPIC', '-', + '-o', str(output_file.name)], input=src.encode('utf-8')) return pathlib.Path(output_file.name).read_bytes() class ClangProgram: