rocm: disassembler for shader

This commit is contained in:
George Hotz
2023-05-06 18:56:09 +00:00
parent 7fbf96b992
commit 5190037cbc
3 changed files with 32 additions and 3 deletions

View File

@@ -1,5 +1,5 @@
from __future__ import annotations
import platform
import platform, pathlib
import numpy as np
import pyopencl as cl # type: ignore
from typing import Optional, List
@@ -12,6 +12,11 @@ OSX = platform.system() == "Darwin"
OSX_TIMING_RATIO = (125/3) if OSX else 1.0 # see test/external_osx_profiling.py to determine this ratio. it's in like GPU clocks or something
FLOAT16 = getenv("FLOAT16", 0)
# TODO: if you fork and exit the child process after creating anything with cl on AMD, it hangs on e.wait()
if DEBUG >= 5:
from extra.helpers import enable_early_exec
early_exec = enable_early_exec()
class _CL:
def __init__(self):
platforms: List[List[cl.Device]] = [y for y in ([x.get_devices(device_type=cl.device_type.GPU) for x in cl.get_platforms()] + [x.get_devices(device_type=cl.device_type.CPU) for x in cl.get_platforms()]) if len(y)]
@@ -54,6 +59,9 @@ class CLProgram:
if 'Adreno' in CL.cl_ctx.devices[0].name:
from disassemblers.adreno import disasm
disasm(self.binary())
elif 'gfx1100' in CL.cl_ctx.devices[0].name:
asm = early_exec(([pathlib.Path(__file__).parent.parent.parent / "extra/rocm/build/llvm-project/bin/llvm-objdump", '-d', '-'], self.binary()))
print('\n'.join([x for x in asm.decode('utf-8').split("\n") if 's_code_end' not in x]))
else:
# print the PTX for NVIDIA. TODO: probably broken for everything else
print(self.binary().decode('utf-8'))