delete half_prekernel (#3388)

* generic rendering of half and bf16

hotfix

* fix uops + regression test

* fix the test for metal's half4

* uop.uop fixup

* mypy with --strict-equality, fix ops_gpu
This commit is contained in:
qazal
2024-02-14 16:40:48 +02:00
committed by GitHub
parent 078a2603d5
commit 27f4de2ce4
4 changed files with 21 additions and 15 deletions

View File

@@ -23,8 +23,8 @@ class CLCompiler(Compiler):
def compile(self, src:str) -> bytes:
program = checked(cl.clCreateProgramWithSource(self.device.context, 1, to_char_p_p([prg_bytes := src.encode()]),
ctypes.byref(ctypes.c_size_t(len(prg_bytes))), ctypes.byref(status := ctypes.c_int32())), status)
status = cl.clBuildProgram(program, 1, ctypes.byref(self.device.device_id), None, cl.clBuildProgram.argtypes[4](), None)
if status != 0:
build_status: int = cl.clBuildProgram(program, 1, ctypes.byref(self.device.device_id), None, cl.clBuildProgram.argtypes[4](), None)
if build_status != 0:
cl.clGetProgramBuildInfo(program, self.device.device_id, cl.CL_PROGRAM_BUILD_LOG, 0, None, ctypes.byref(log_size := ctypes.c_size_t()))
cl.clGetProgramBuildInfo(program, self.device.device_id, cl.CL_PROGRAM_BUILD_LOG, log_size.value, mstr := ctypes.create_string_buffer(log_size.value), None) # noqa: E501
raise RuntimeError(f"OpenCL Compile Error\n\n{ctypes.string_at(mstr, size=log_size.value).decode()}")