From 13646ae07a0d0fe03de9398e50ce0ae0a0cdbe13 Mon Sep 17 00:00:00 2001 From: Comma Device Date: Mon, 22 Aug 2022 10:26:42 -0700 Subject: [PATCH] opencl can't optimize that --- tinygrad/llops/ops_gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tinygrad/llops/ops_gpu.py b/tinygrad/llops/ops_gpu.py index 58544bb553..f8735e72be 100644 --- a/tinygrad/llops/ops_gpu.py +++ b/tinygrad/llops/ops_gpu.py @@ -130,7 +130,7 @@ class GPUBuffer: buf_types = [f"__global const float *{name}_g" for name, _ in bufs if name not in views or views[name][1]] conv_prg = CLProgram(kernel_name, f"""{chr(10).join([x[0] for x in views.values()])} __kernel void {kernel_name}({','.join(["__global float* restrict output"] + buf_types + (["__local float *temp"] if inter_red > 1 else []))}) {{ - float acc = {GPUBuffer.start_for_op[op]}; int gid = get_global_id(0); int mid = get_global_id(1); + float acc = {GPUBuffer.start_for_op[op]}; int gid = get_global_id(0); {'int mid = get_global_id(1);' if inter_red > 1 else 'int mid = 0;'} for (int idx = gid * {red} + {red//inter_red + 1} * mid; idx < gid * {red} + min({red}, {red//inter_red + 1} * (mid+1)); idx++) {{ {chr(10).join([f' float {name} = ' + (f'get_{name}({name}_g, idx);' if views[name][1] else f'get_{name}(idx);') for name, _ in bufs if name in earlybufs])} acc = {earlycode};