Files
tinygrad/test/external/external_cl_half_max.py
George Hotz bbeba8ec85 use default dict for external_model_benchmark (#2592)
* device default

* Device.DEFAULT

* half max for cuda

* CUDA_INCLUDE_PATH

* closer to working

* cuda fixups

* Update ops_cuda.py
2023-12-03 15:25:43 -08:00

14 lines
370 B
Python

from tinygrad.runtime.ops_gpu import CLDevice, CLProgram, compile_cl
if __name__ == "__main__":
dev = CLDevice()
lib = compile_cl("""
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void test(__global half *out, __global half *a, __global half *b) {
int gid = get_global_id(0);
out[gid] = max(a[gid], b[gid]);
}
""")
prg = CLProgram(dev, "test", lib)