viz: replace llvm disasm with our disasm (#14325)

This commit is contained in:
qazal
2026-01-24 23:56:56 -05:00
committed by GitHub
parent 4280a8eef2
commit 647e527a7e
2 changed files with 20 additions and 22 deletions

View File

@@ -159,6 +159,7 @@ def main() -> None:
if not trace: raise RuntimeError(f"no matching trace for {args.kernel}")
n = 0
for s in trace["steps"]:
if "PKTS" in s["name"]: continue
print(s["name"])
data = viz.get_render(s["query"])
print_data(data)

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
import multiprocessing, pickle, difflib, os, threading, json, time, sys, webbrowser, socket, argparse, functools, codecs, io, struct
import ctypes, pathlib, traceback, itertools, socketserver
import pathlib, traceback, itertools, socketserver
from contextlib import redirect_stdout, redirect_stderr, contextmanager
from decimal import Decimal
from urllib.parse import parse_qs, urlparse
@@ -345,7 +345,7 @@ def unpack_sqtt(key:tuple[str, int], data:list, p:ProfileProgramEvent) -> tuple[
# * init decoder
from extra.sqtt.roc import decode
base = unwrap(p.base)
disasm = {addr+base:inst_disasm for addr,inst_disasm in llvm_disasm(device_props[p.device]["gfx_target_version"], unwrap(p.lib)).items()}
disasm = {addr+base:inst_disasm for addr,inst_disasm in amd_disasm(device_props[p.device]["gfx_target_version"], unwrap(p.lib)).items()}
rctx = decode(data, {p.name:disasm})
cu_events:dict[str, list[ProfileEvent]] = {}
# * INST waves
@@ -431,29 +431,26 @@ def amd_readelf(lib:bytes) -> list[dict]:
".group_segment_fixed_size":"LDS size", ".private_segment_fixed_size":"Scratch size"}
return [{"label":label, "value":v} for k,label in keys.items() if (v:=notes["amdhsa.kernels"][0][k]) > 0]
def llvm_disasm(target:int, lib:bytes) -> dict[int, tuple[str, int]]:
from tinygrad.runtime.autogen import llvm
def amd_disasm(target:int, lib:bytes) -> dict[int, tuple[str, int]]:
from tinygrad.runtime.support.elf import elf_loader
llvm.LLVMInitializeAMDGPUTargetInfo()
llvm.LLVMInitializeAMDGPUTargetMC()
llvm.LLVMInitializeAMDGPUAsmParser()
llvm.LLVMInitializeAMDGPUDisassembler()
arch = "gfx%d%x%x" % (target // 10000, (target // 100) % 100, target % 100)
# pass NULL to callbacks
ctx = llvm.LLVMCreateDisasmCPUFeatures("amdgcn-amd-amdhsa".encode(), arch.encode(), "".encode(), None, 0, ctypes.cast(0, llvm.LLVMOpInfoCallback),
ctypes.cast(0, llvm.LLVMSymbolLookupCallback))
from extra.assembly.amd.decode import detect_format
image, sections, _ = elf_loader(lib)
text = next((sh.header for sh in sections if sh.name == ".text"), None)
text = next((sh for sh in sections if sh.name == ".text"), None)
assert text is not None, "no .text section found in ELF"
off, sz = text.sh_addr, text.sh_size
off, buf = text.header.sh_addr, text.content
arch = {11:"rdna3", 12:"rdna4"}.get(target//10000, "cdna")
addr_table:dict[int, tuple[str, int]] = {}
out = ctypes.create_string_buffer(128)
cur_off = off
while cur_off < sz + off:
view = (ctypes.c_ubyte * ((sz + off) - cur_off)).from_buffer_copy(memoryview(image)[cur_off:])
instr_sz = llvm.LLVMDisasmInstruction(ctx, view, ctypes.c_uint64(len(view)), ctypes.c_uint64(0), out, ctypes.c_size_t(128))
addr_table[cur_off] = (out.value.decode("utf-8", "replace").strip(), instr_sz)
cur_off += instr_sz
offset = 0
while offset < len(buf):
remaining = buf[offset:]
fmt = detect_format(remaining, arch)
decoded = fmt.from_bytes(remaining)
disasm = decoded.disasm()
# note: rocprof trace decoder assumes simm16 is a decimal integer, our disasm uses hex
# keep the decimal int for backwards compatibility, remove once there's no rocprof decoder
if "branch" in disasm: disasm = f"{decoded.op_name.lower()} {decoded.simm16}"
addr_table[off+offset] = (disasm, decoded.size())
offset += decoded.size()
return addr_table
SOPP_INSTS = {"s_branch", "s_cbranch_scc0", "s_cbranch_scc1", "s_cbranch_vccz", "s_cbranch_vccnz", "s_cbranch_execz", "s_cbranch_execnz"}
@@ -488,7 +485,7 @@ def amdgpu_tokenize(st:str) -> list[str]:
COND_TAKEN, COND_NOT_TAKEN, UNCOND = range(3)
def amdgpu_cfg(lib:bytes, target:int) -> dict:
# disassemble
pc_table = llvm_disasm(target, lib)
pc_table = amd_disasm(target, lib)
# get leaders
leaders:set[int] = {next(iter(pc_table))}
for pc, (asm, sz) in pc_table.items():