mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
viz: replace llvm disasm with our disasm (#14325)
This commit is contained in:
@@ -159,6 +159,7 @@ def main() -> None:
|
||||
if not trace: raise RuntimeError(f"no matching trace for {args.kernel}")
|
||||
n = 0
|
||||
for s in trace["steps"]:
|
||||
if "PKTS" in s["name"]: continue
|
||||
print(s["name"])
|
||||
data = viz.get_render(s["query"])
|
||||
print_data(data)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
import multiprocessing, pickle, difflib, os, threading, json, time, sys, webbrowser, socket, argparse, functools, codecs, io, struct
|
||||
import ctypes, pathlib, traceback, itertools, socketserver
|
||||
import pathlib, traceback, itertools, socketserver
|
||||
from contextlib import redirect_stdout, redirect_stderr, contextmanager
|
||||
from decimal import Decimal
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
@@ -345,7 +345,7 @@ def unpack_sqtt(key:tuple[str, int], data:list, p:ProfileProgramEvent) -> tuple[
|
||||
# * init decoder
|
||||
from extra.sqtt.roc import decode
|
||||
base = unwrap(p.base)
|
||||
disasm = {addr+base:inst_disasm for addr,inst_disasm in llvm_disasm(device_props[p.device]["gfx_target_version"], unwrap(p.lib)).items()}
|
||||
disasm = {addr+base:inst_disasm for addr,inst_disasm in amd_disasm(device_props[p.device]["gfx_target_version"], unwrap(p.lib)).items()}
|
||||
rctx = decode(data, {p.name:disasm})
|
||||
cu_events:dict[str, list[ProfileEvent]] = {}
|
||||
# * INST waves
|
||||
@@ -431,29 +431,26 @@ def amd_readelf(lib:bytes) -> list[dict]:
|
||||
".group_segment_fixed_size":"LDS size", ".private_segment_fixed_size":"Scratch size"}
|
||||
return [{"label":label, "value":v} for k,label in keys.items() if (v:=notes["amdhsa.kernels"][0][k]) > 0]
|
||||
|
||||
def llvm_disasm(target:int, lib:bytes) -> dict[int, tuple[str, int]]:
|
||||
from tinygrad.runtime.autogen import llvm
|
||||
def amd_disasm(target:int, lib:bytes) -> dict[int, tuple[str, int]]:
|
||||
from tinygrad.runtime.support.elf import elf_loader
|
||||
llvm.LLVMInitializeAMDGPUTargetInfo()
|
||||
llvm.LLVMInitializeAMDGPUTargetMC()
|
||||
llvm.LLVMInitializeAMDGPUAsmParser()
|
||||
llvm.LLVMInitializeAMDGPUDisassembler()
|
||||
arch = "gfx%d%x%x" % (target // 10000, (target // 100) % 100, target % 100)
|
||||
# pass NULL to callbacks
|
||||
ctx = llvm.LLVMCreateDisasmCPUFeatures("amdgcn-amd-amdhsa".encode(), arch.encode(), "".encode(), None, 0, ctypes.cast(0, llvm.LLVMOpInfoCallback),
|
||||
ctypes.cast(0, llvm.LLVMSymbolLookupCallback))
|
||||
from extra.assembly.amd.decode import detect_format
|
||||
image, sections, _ = elf_loader(lib)
|
||||
text = next((sh.header for sh in sections if sh.name == ".text"), None)
|
||||
text = next((sh for sh in sections if sh.name == ".text"), None)
|
||||
assert text is not None, "no .text section found in ELF"
|
||||
off, sz = text.sh_addr, text.sh_size
|
||||
off, buf = text.header.sh_addr, text.content
|
||||
arch = {11:"rdna3", 12:"rdna4"}.get(target//10000, "cdna")
|
||||
addr_table:dict[int, tuple[str, int]] = {}
|
||||
out = ctypes.create_string_buffer(128)
|
||||
cur_off = off
|
||||
while cur_off < sz + off:
|
||||
view = (ctypes.c_ubyte * ((sz + off) - cur_off)).from_buffer_copy(memoryview(image)[cur_off:])
|
||||
instr_sz = llvm.LLVMDisasmInstruction(ctx, view, ctypes.c_uint64(len(view)), ctypes.c_uint64(0), out, ctypes.c_size_t(128))
|
||||
addr_table[cur_off] = (out.value.decode("utf-8", "replace").strip(), instr_sz)
|
||||
cur_off += instr_sz
|
||||
offset = 0
|
||||
while offset < len(buf):
|
||||
remaining = buf[offset:]
|
||||
fmt = detect_format(remaining, arch)
|
||||
decoded = fmt.from_bytes(remaining)
|
||||
disasm = decoded.disasm()
|
||||
# note: rocprof trace decoder assumes simm16 is a decimal integer, our disasm uses hex
|
||||
# keep the decimal int for backwards compatibility, remove once there's no rocprof decoder
|
||||
if "branch" in disasm: disasm = f"{decoded.op_name.lower()} {decoded.simm16}"
|
||||
addr_table[off+offset] = (disasm, decoded.size())
|
||||
offset += decoded.size()
|
||||
return addr_table
|
||||
|
||||
SOPP_INSTS = {"s_branch", "s_cbranch_scc0", "s_cbranch_scc1", "s_cbranch_vccz", "s_cbranch_vccnz", "s_cbranch_execz", "s_cbranch_execnz"}
|
||||
@@ -488,7 +485,7 @@ def amdgpu_tokenize(st:str) -> list[str]:
|
||||
COND_TAKEN, COND_NOT_TAKEN, UNCOND = range(3)
|
||||
def amdgpu_cfg(lib:bytes, target:int) -> dict:
|
||||
# disassemble
|
||||
pc_table = llvm_disasm(target, lib)
|
||||
pc_table = amd_disasm(target, lib)
|
||||
# get leaders
|
||||
leaders:set[int] = {next(iter(pc_table))}
|
||||
for pc, (asm, sz) in pc_table.items():
|
||||
|
||||
Reference in New Issue
Block a user