mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-02-10 22:54:59 -05:00
Fix cuda (#836)
* disabled float4 ALU ops for CUDA, small fix to add half_prekernel before kernel_prefix * added supports_float4_alu option, and disabled for ops_cuda
This commit is contained in:
@@ -91,5 +91,6 @@ class CLCodegen(CStyleCodegen):
|
||||
half_prekernel = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable",
|
||||
barrier = "barrier(CLK_LOCAL_MEM_FENCE);", float4 = "(float4)",
|
||||
gid = [f'get_global_id({i})' for i in range(3)], lid = [f'get_local_id({i})' for i in range(3)], uses_vload=True)
|
||||
|
||||
supports_float4_alu = True
|
||||
supports_float4 = True
|
||||
GPUBuffer = Compiled(CLBuffer, CLCodegen, CLProgram, CL.synchronize)
|
||||
|
||||
Reference in New Issue
Block a user