From 1fa4f161fed3e712ae67e5e44ce86d967f368479 Mon Sep 17 00:00:00 2001 From: Marcus Asteborg Date: Sat, 16 Dec 2023 23:48:44 -0800 Subject: [PATCH] Update CLProgram to use unsigned long long for event profiling (#2808) On Windows, the unsigned long type is 32-bit, which is not compatible with the required data size for event profiling. --- tinygrad/runtime/ops_gpu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tinygrad/runtime/ops_gpu.py b/tinygrad/runtime/ops_gpu.py index fa72f2cfb4..2ecd128823 100644 --- a/tinygrad/runtime/ops_gpu.py +++ b/tinygrad/runtime/ops_gpu.py @@ -51,8 +51,8 @@ class CLProgram: check(cl.clEnqueueNDRangeKernel(self.device.queue, self.kernel, len(global_size), None, (ctypes.c_size_t * len(global_size))(*global_size), (ctypes.c_size_t * len(local_size))(*local_size) if local_size else None, 0, None, event)) # noqa: E501 if wait: check(cl.clWaitForEvents(1, ctypes.byref(event))) - start = init_c_var(ctypes.c_ulong(), lambda x: check(cl.clGetEventProfilingInfo(event, cl.CL_PROFILING_COMMAND_START, ctypes.sizeof(x), ctypes.byref(x), None))) # noqa: E501 - end = init_c_var(ctypes.c_ulong(), lambda x: check(cl.clGetEventProfilingInfo(event, cl.CL_PROFILING_COMMAND_END, ctypes.sizeof(x), ctypes.byref(x), None))) # noqa: E501 + start = init_c_var(ctypes.c_uint64(), lambda x: check(cl.clGetEventProfilingInfo(event, cl.CL_PROFILING_COMMAND_START, ctypes.sizeof(x), ctypes.byref(x), None))) # noqa: E501 + end = init_c_var(ctypes.c_uint64(), lambda x: check(cl.clGetEventProfilingInfo(event, cl.CL_PROFILING_COMMAND_END, ctypes.sizeof(x), ctypes.byref(x), None))) # noqa: E501 return float(end.value-start.value) * OSX_TIMING_RATIO * 1e-9 return None