ptx timing vs cuda timing (#3659)

This commit is contained in:
George Hotz
2024-03-08 10:17:49 -08:00
committed by GitHub
parent daa4034e80
commit ac02e7347d
3 changed files with 65 additions and 1 deletions

View File

@@ -77,6 +77,7 @@ class CUDAProgram:
self.module = cuda.CUmodule()
status = cuda.cuModuleLoadData(ctypes.byref(self.module), lib)
if status != 0:
del self.module
cuda_disassemble(lib, device.arch)
raise RuntimeError("module load failed")
check(cuda.cuModuleGetFunction(ctypes.byref(prg := cuda.CUfunction()), self.module, name.encode("utf-8")))