optimizer: add matvec optimizations (#1972)

* optimizer: add matvec optimizations

* renderer: fix alignment of shared memory in opencl
This commit is contained in:
Francis Lam
2023-10-04 14:16:27 -07:00
committed by GitHub
parent 3d5127038c
commit 0ba75c4370
4 changed files with 153 additions and 3 deletions

View File

@@ -104,7 +104,7 @@ class CLProgram:
return None
renderer = functools.partial(uops_to_cstyle, CStyleLanguage(
kernel_prefix = "__kernel ", buffer_prefix = "__global ", smem_prefix = "__local ", arg_int_prefix = "const int",
kernel_prefix = "__kernel ", buffer_prefix = "__global ", smem_align = "__attribute__ ((aligned (16))) ", smem_prefix = "__local ", arg_int_prefix = "const int",
half_prekernel = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable",
barrier = "barrier(CLK_LOCAL_MEM_FENCE);", float4 = "(float4)",
gid = [f'get_group_id({i})' for i in range(3)], lid = [f'get_local_id({i})' for i in range(3)], uses_vload=True))