From aad005e22089dcc14736612946f72c0fc0a100c0 Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 13 Dec 2023 20:23:27 -0500 Subject: [PATCH] set default str for CStyleLanguage.arg_int_prefix (#2756) it's the same `const int` for clang, opencl, cuda and hip metal overwrites with `constant int&` and webgl has its own thing --- tinygrad/renderer/cstyle.py | 5 +---- tinygrad/runtime/ops_clang.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/tinygrad/renderer/cstyle.py b/tinygrad/renderer/cstyle.py index c8ef1e723a..34b7f55f20 100644 --- a/tinygrad/renderer/cstyle.py +++ b/tinygrad/renderer/cstyle.py @@ -14,7 +14,7 @@ class CStyleLanguage(NamedTuple): smem_align: str = "" smem_prefix: str = "" smem_prefix_for_cast: bool = True - arg_int_prefix: str = "" + arg_int_prefix: str = "const int" barrier: str = "" xid: List[str] = [] gid: List[str] = [] @@ -216,7 +216,6 @@ class OpenCLLanguage(CStyleLanguage): buffer_prefix = "__global " smem_align = "__attribute__ ((aligned (16))) " smem_prefix = "__local " - arg_int_prefix = "const int" half_prekernel = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable" barrier = "barrier(CLK_LOCAL_MEM_FENCE);" float4 = "(float4)" @@ -250,7 +249,6 @@ class CUDALanguage(CStyleLanguage): kernel_prefix = "#define INFINITY (__int_as_float(0x7f800000))\n#define NAN (__int_as_float(0x7fffffff))\nextern \"C\" __global__ " smem_prefix = "__shared__ " smem_prefix_for_cast = False - arg_int_prefix = "const int" barrier = "__syncthreads();" float4 = "make_float4" gid = [f'blockIdx.{chr(120+i)}' for i in range(3)] @@ -284,7 +282,6 @@ class HIPLanguage(CStyleLanguage): float4 = "make_float4" uses_vload=True uses_ptr_arithmetic=True - arg_int_prefix = "const int" half_prekernel = "#include \n" + """ typedef union { struct { half x, y, z, w; } __attribute__((aligned(8))); half data[4]; } half4; __device__ half4 make_half4(half x, half y, half z, half w) { return {x, y, z, w}; } diff --git a/tinygrad/runtime/ops_clang.py b/tinygrad/runtime/ops_clang.py index 62d3a797aa..2be1fcbf98 100644 --- a/tinygrad/runtime/ops_clang.py +++ b/tinygrad/runtime/ops_clang.py @@ -24,5 +24,5 @@ class ClangProgram: def __call__(self, *bufs, vals=(), wait=False): return cpu_time_execution(lambda: self.fxn(*bufs, *vals), enable=wait) -renderer = functools.partial(uops_to_cstyle, CStyleLanguage(buffer_suffix=" restrict", arg_int_prefix="const int")) +renderer = functools.partial(uops_to_cstyle, CStyleLanguage(buffer_suffix=" restrict")) ClangDevice = Compiled(MallocAllocator, LinearizerOptions(supports_float4=False, has_local=False), renderer, compile_clang, ClangProgram)