mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
nv/cuda compilers touchup (#5759)
* nv/cuda compilers touchup * fix cuda check + move nv disasm * remove includes * fix nvrtc_check
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from __future__ import annotations
|
||||
import os, ctypes, contextlib, pathlib, re, fcntl, functools, mmap, struct, tempfile, hashlib, subprocess, time, array
|
||||
import os, ctypes, contextlib, re, fcntl, functools, mmap, struct, time, array
|
||||
from typing import Tuple, List, Any, cast, Union, Dict
|
||||
from dataclasses import dataclass
|
||||
from tinygrad.device import HCQCompiled, HCQAllocator, HCQBuffer, HWCommandQueue, HWComputeQueue, HWCopyQueue, hcq_command, \
|
||||
@@ -7,7 +7,7 @@ from tinygrad.device import HCQCompiled, HCQAllocator, HCQBuffer, HWCommandQueue
|
||||
from tinygrad.helpers import getenv, mv_address, init_c_struct_t, to_mv, round_up, data64, data64_le, DEBUG, prod
|
||||
from tinygrad.renderer.assembly import PTXRenderer
|
||||
from tinygrad.renderer.cstyle import NVRenderer
|
||||
from tinygrad.runtime.support.compiler_cuda import CUDACompiler, PTXCompiler, PTX, NVPTXCompiler, NVCompiler
|
||||
from tinygrad.runtime.support.compiler_cuda import CUDACompiler, PTXCompiler, PTX, NVPTXCompiler, NVCompiler, nv_disassemble
|
||||
from tinygrad.runtime.autogen import nv_gpu, libc
|
||||
from tinygrad.runtime.support.elf import elf_loader
|
||||
if getenv("IOCTL"): import extra.nv_gpu_driver.nv_ioctl # noqa: F401 # pylint: disable=unused-import
|
||||
@@ -205,12 +205,7 @@ class NVCopyQueue(NVCommandQueue, HWCopyQueue):
|
||||
class NVProgram(HCQProgram):
|
||||
def __init__(self, device:NVDevice, name:str, lib:bytes):
|
||||
self.device, self.name, self.lib = device, name, lib
|
||||
if DEBUG >= 6:
|
||||
try:
|
||||
fn = (pathlib.Path(tempfile.gettempdir()) / f"tinycuda_{hashlib.md5(lib).hexdigest()}").as_posix()
|
||||
with open(fn + ".cubin", "wb") as f: f.write(lib)
|
||||
print(subprocess.check_output(["nvdisasm", fn+".cubin"]).decode('utf-8'))
|
||||
except Exception as e: print("Failed to disasm cubin:", str(e), "Make sure your PATH contains nvdisasm binary of compatible version.")
|
||||
if DEBUG >= 6: nv_disassemble(lib)
|
||||
|
||||
if MOCKGPU: image, sections, relocs = memoryview(bytearray(lib) + b'\x00' * (4 - len(lib)%4)).cast("I"), [], [] # type: ignore
|
||||
else: image, sections, relocs = elf_loader(self.lib, force_section_align=128)
|
||||
|
||||
Reference in New Issue
Block a user