nv/cuda compilers touchup (#5759)

* nv/cuda compilers touchup

* fix cuda check + move nv disasm

* remove includes

* fix nvrtc_check
This commit is contained in:
nimlgen
2024-07-28 00:15:28 +03:00
committed by GitHub
parent 3c79faaf77
commit 1903542c2d
3 changed files with 35 additions and 35 deletions

View File

@@ -1,5 +1,5 @@
from __future__ import annotations
import os, ctypes, contextlib, pathlib, re, fcntl, functools, mmap, struct, tempfile, hashlib, subprocess, time, array
import os, ctypes, contextlib, re, fcntl, functools, mmap, struct, time, array
from typing import Tuple, List, Any, cast, Union, Dict
from dataclasses import dataclass
from tinygrad.device import HCQCompiled, HCQAllocator, HCQBuffer, HWCommandQueue, HWComputeQueue, HWCopyQueue, hcq_command, \
@@ -7,7 +7,7 @@ from tinygrad.device import HCQCompiled, HCQAllocator, HCQBuffer, HWCommandQueue
from tinygrad.helpers import getenv, mv_address, init_c_struct_t, to_mv, round_up, data64, data64_le, DEBUG, prod
from tinygrad.renderer.assembly import PTXRenderer
from tinygrad.renderer.cstyle import NVRenderer
from tinygrad.runtime.support.compiler_cuda import CUDACompiler, PTXCompiler, PTX, NVPTXCompiler, NVCompiler
from tinygrad.runtime.support.compiler_cuda import CUDACompiler, PTXCompiler, PTX, NVPTXCompiler, NVCompiler, nv_disassemble
from tinygrad.runtime.autogen import nv_gpu, libc
from tinygrad.runtime.support.elf import elf_loader
if getenv("IOCTL"): import extra.nv_gpu_driver.nv_ioctl # noqa: F401 # pylint: disable=unused-import
@@ -205,12 +205,7 @@ class NVCopyQueue(NVCommandQueue, HWCopyQueue):
class NVProgram(HCQProgram):
def __init__(self, device:NVDevice, name:str, lib:bytes):
self.device, self.name, self.lib = device, name, lib
if DEBUG >= 6:
try:
fn = (pathlib.Path(tempfile.gettempdir()) / f"tinycuda_{hashlib.md5(lib).hexdigest()}").as_posix()
with open(fn + ".cubin", "wb") as f: f.write(lib)
print(subprocess.check_output(["nvdisasm", fn+".cubin"]).decode('utf-8'))
except Exception as e: print("Failed to disasm cubin:", str(e), "Make sure your PATH contains nvdisasm binary of compatible version.")
if DEBUG >= 6: nv_disassemble(lib)
if MOCKGPU: image, sections, relocs = memoryview(bytearray(lib) + b'\x00' * (4 - len(lib)%4)).cast("I"), [], [] # type: ignore
else: image, sections, relocs = elf_loader(self.lib, force_section_align=128)