ops_cuda fix race condition on cubin file read when testing with multiple cores (#1172)

This commit is contained in:
cloud11665
2023-07-07 21:05:16 +02:00
committed by GitHub
parent aa60feda48
commit 884b5965de

View File

@@ -1,7 +1,5 @@
import subprocess
import subprocess, time, re, hashlib, tempfile
from typing import Optional
import time
import re
import numpy as np
from pycuda.compiler import compile as cuda_compile # type: ignore
from tinygrad.helpers import DEBUG, getenv, fromimport, colored
@@ -56,9 +54,10 @@ class CUDAProgram:
def __init__(self, name:str, prg:str, binary=False):
try:
if DEBUG >= 6:
with open("/tmp/cubin", "wb") as f:
fn = f"{tempfile.gettempdir()}/tinycuda_{hashlib.md5(prg.encode('utf-8')).hexdigest()}"
with open(fn, "wb") as f:
f.write(cuda_compile(prg, target="cubin", no_extern_c=True))
sass = subprocess.check_output(['nvdisasm', '/tmp/cubin']).decode('utf-8')
sass = subprocess.check_output(['nvdisasm', fn]).decode('utf-8')
print(sass)
if not binary: prg = cuda_compile(prg, target="ptx", no_extern_c=True, options=['-Wno-deprecated-gpu-targets']).decode('utf-8')
except cuda.CompileError as e: