AMDCompiler refactor (no_comgr prereq) (#9497)

* add amdgpu_disassemble to helpers

* refactor hip compiler

---------

Co-authored-by: b1tg <b1tg@users.noreply.github.com>
This commit is contained in:
b1tg
2025-03-20 09:44:07 +08:00
committed by GitHub
parent 8c0d0a122c
commit bd731a8624
3 changed files with 10 additions and 8 deletions

View File

@@ -10,7 +10,7 @@ from tinygrad.helpers import getenv, to_mv, round_up, data64_le, mv_address, DEB
from tinygrad.renderer.cstyle import AMDRenderer
from tinygrad.runtime.autogen import kfd, hsa, amd_gpu, libc, pci, vfio, sqtt
from tinygrad.runtime.autogen.am import am, gc_11_0_0
from tinygrad.runtime.support.compiler_hip import AMDCompiler
from tinygrad.runtime.support.compiler_amd import HIPCompiler
from tinygrad.runtime.support.elf import elf_loader
from tinygrad.runtime.support.am.amdev import AMDev, AMMapping
if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401 # pylint: disable=unused-import
@@ -706,7 +706,7 @@ class AMDDevice(HCQCompiled):
self.sdma_queue = self.create_queue(kfd.KFD_IOC_QUEUE_TYPE_SDMA, 0x800000)
super().__init__(device, AMDAllocator(self), AMDRenderer(self.arch), AMDCompiler(self.arch), functools.partial(AMDProgram, self),
super().__init__(device, AMDAllocator(self), AMDRenderer(self.arch), HIPCompiler(self.arch), functools.partial(AMDProgram, self),
AMDSignal, AMDComputeQueue, AMDCopyQueue)
# Scratch setup

View File

@@ -2,7 +2,7 @@ import ctypes, functools
from tinygrad.helpers import init_c_var, from_mv, init_c_struct_t, getenv
from tinygrad.device import Compiled, LRUAllocator, BufferSpec
from tinygrad.runtime.autogen import hip
from tinygrad.runtime.support.compiler_hip import AMDCompiler
from tinygrad.runtime.support.compiler_amd import HIPCompiler
from tinygrad.renderer.cstyle import HIPRenderer
if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401 # pylint: disable=unused-import
@@ -14,7 +14,7 @@ class HIPDevice(Compiled):
self.device_id = int(device.split(":")[1]) if ":" in device else 0
self.arch = init_c_var(hip.hipDeviceProp_t(), lambda x: check(hip.hipGetDeviceProperties(x, self.device_id))).gcnArchName.decode()
self.time_event_st, self.time_event_en = [init_c_var(hip.hipEvent_t(), lambda x: hip.hipEventCreate(ctypes.byref(x), 0)) for _ in range(2)]
super().__init__(device, HIPAllocator(self), HIPRenderer(self.arch), AMDCompiler(self.arch), functools.partial(HIPProgram, self))
super().__init__(device, HIPAllocator(self), HIPRenderer(self.arch), HIPCompiler(self.arch), functools.partial(HIPProgram, self))
def synchronize(self):
check(hip.hipSetDevice(self.device_id))
check(hip.hipDeviceSynchronize())

View File

@@ -2,6 +2,10 @@ import ctypes, subprocess
import tinygrad.runtime.autogen.comgr as comgr
from tinygrad.device import Compiler, CompileError
def amdgpu_disassemble(lib:bytes):
asm = subprocess.check_output(["/opt/rocm/llvm/bin/llvm-objdump", '-d', '-'], input=lib)
print('\n'.join([x for x in asm.decode('utf-8').split("\n") if 's_code_end' not in x]))
def check(status):
if status != 0:
comgr.amd_comgr_status_string(status, ctypes.byref(status_str := ctypes.POINTER(ctypes.c_char)()))
@@ -56,13 +60,11 @@ def compile_hip(prg:str, arch="gfx1100", asm=False) -> bytes:
check(comgr.amd_comgr_destroy_action_info(action_info))
return ret
class AMDCompiler(Compiler):
class HIPCompiler(Compiler):
def __init__(self, arch:str):
self.arch = arch
super().__init__(f"compile_hip_{self.arch}")
def compile(self, src:str) -> bytes:
try: return compile_hip(src, self.arch, src.split('\n', 1)[0].strip() == '.text')
except RuntimeError as e: raise CompileError(e) from e
def disassemble(self, lib:bytes):
asm = subprocess.check_output(["/opt/rocm/llvm/bin/llvm-objdump", '-d', '-'], input=lib)
print('\n'.join([x for x in asm.decode('utf-8').split("\n") if 's_code_end' not in x]))
def disassemble(self, lib:bytes): amdgpu_disassemble(lib)