diff --git a/tinygrad/runtime/ops_clang.py b/tinygrad/runtime/ops_clang.py index 5b32796202..2b7eb54952 100644 --- a/tinygrad/runtime/ops_clang.py +++ b/tinygrad/runtime/ops_clang.py @@ -1,17 +1,22 @@ -import os, time, ctypes, hashlib, subprocess, platform +import os, time, ctypes, hashlib, subprocess, platform, tempfile from tinygrad.ops import Compiled from tinygrad.runtime.lib import RawMallocBuffer from tinygrad.codegen.cstyle import CStyleCodegen, CStyleLanguage +args = { + 'Windows': {'cflags':'', 'ext':'dll', 'exp':'__declspec(dllexport)'}, + 'Linux': {'cflags':'-lm -fPIC --rtlib=compiler-rt ', 'ext':'so', 'exp':''}, + 'Darwin': {'cflags':'-lm -fPIC --rtlib=compiler-rt ', 'ext':'dylib', 'exp':''} +}[platform.system()] + class ClangProgram: def __init__(self, name:str, prg:str): - prg = "#include \n#define max(x,y) ((x>y)?x:y)\n#define int64 long\n#define half __fp16\n#define uchar unsigned char\n#define bool uchar\n" + prg + prg = '#include \n#define max(x,y) ((x>y)?x:y)\n#define int64 long\n#define half __fp16\n#define uchar unsigned char\n#define bool uchar\n' + prg # TODO: is there a way to not write this to disk? - fn = f"/tmp/clang_{hashlib.md5(prg.encode('utf-8')).hexdigest()}.{'dylib' if platform.system() == 'Darwin' else 'so'}" - # NOTE: --rtlib=compiler-rt fixes float16 on Linux, it defines __gnu_h2f_ieee and __gnu_f2h_ieee + fn = f"{tempfile.gettempdir()}/clang_{hashlib.md5(prg.encode('utf-8')).hexdigest()}.{args['ext']}" if not os.path.exists(fn): - subprocess.check_output(['clang', '-shared', '-O2', '-Wall','-Werror', '-lm', '--rtlib=compiler-rt', '-fPIC', '-x', 'c', '-', '-o', fn+".tmp"], input=prg.encode('utf-8')) - os.rename(fn+".tmp", fn) + subprocess.check_output(args=('clang -shared -O2 -Wall -Werror -x c '+args['cflags']+' - -o '+fn+'.tmp').split(), input=prg.encode('utf-8')) + os.rename(fn+'.tmp', fn) self.lib = ctypes.CDLL(fn) self.fxn = self.lib[name] @@ -21,7 +26,7 @@ class ClangProgram: if wait: return time.monotonic()-st class ClangCodegen(CStyleCodegen): - lang = CStyleLanguage(buffer_suffix=" restrict") + lang = CStyleLanguage(kernel_prefix=args['exp'], buffer_suffix=" restrict") supports_float4: bool = False ClangBuffer = Compiled(RawMallocBuffer, ClangCodegen, ClangProgram)