cleanup sqtt tooling (#13188)

* cleanup viz/serve.py

* use latest profile in rgptool.py

* unwrap nullable in roc.py, fix disasms typing
This commit is contained in:
qazal
2025-11-10 20:52:57 +08:00
committed by GitHub
parent 845a24dcc6
commit 38a24731a1
3 changed files with 20 additions and 20 deletions

View File

@@ -4,7 +4,7 @@ import argparse, ctypes, struct, hashlib, pickle, code, typing, functools
import tinygrad.runtime.autogen.sqtt as sqtt
from tinygrad.device import ProfileEvent, ProfileDeviceEvent, ProfileProgramEvent
from tinygrad.runtime.ops_amd import ProfileSQTTEvent
from tinygrad.helpers import round_up, flatten, all_same
from tinygrad.helpers import round_up, flatten, all_same, temp
from dataclasses import dataclass
CHUNK_CLASSES = {
@@ -210,7 +210,7 @@ class RGP:
flags=0,
trace_shader_core_clock=0x93f05080,
trace_memory_clock=0x4a723a40,
device_id={110000: 0x744c, 110003: 0x7480, 120001: 0x7550}[device_props['gfx_target_version']],
device_id={110000: 0x744c, 110003: 0x7480, 120001: 0x7550, 120000: 0x7550}[device_props['gfx_target_version']],
device_revision_id=0xc8,
vgprs_per_simd=1536,
sgprs_per_simd=128*16,
@@ -324,7 +324,7 @@ class RGP:
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='rgptool', description='A tool to create (from pickled tinygrad profile), inspect and modify Radeon GPU Profiler files')
parser.add_argument('command')
parser.add_argument('input')
parser.add_argument('input', nargs='?', default=temp("profile.pkl", append_user=True))
parser.add_argument('-d', '--device')
parser.add_argument('-o', '--output')
args = parser.parse_args()
@@ -346,3 +346,4 @@ if __name__ == '__main__':
if args.output is not None:
with open(args.output, 'wb+') as fd: fd.write(rgp.to_bytes())
print(f"Saved to {args.output}")

View File

@@ -60,7 +60,7 @@ class _ROCParseCtx:
def __init__(self, dev_evs:dict[str, ProfileDeviceEvent], sqtt_evs:list[ProfileSQTTEvent], prog_evs:list[ProfileProgramEvent]):
self.dev_evs, self.sqtt_evs, self.prog_evs = dev_evs, iter(sqtt_evs), prog_evs
self.wave_events:dict[PrgExec, dict[int, InstInfo]] = {}
self.disasms:dict[int, tuple[str, int]] = {}
self.disasms:dict[tuple[str, int], tuple[str, int]] = {}
self.inst_execs:dict[PrgExec, list[InstExec]] = {}
for prog in prog_evs:
@@ -85,13 +85,13 @@ class _ROCParseCtx:
for j in range(ev.instructions_size):
inst_ev = ev.instructions_array[j]
inst_typ = rocprof.rocprofiler_thread_trace_decoder_inst_category_t__enumvalues[inst_ev.category]
inst_disasm = self.disasms[(self.active_kern, inst_ev.pc.address)][0]
inst_disasm = self.disasms[(unwrap(self.active_kern), unwrap(inst_ev.pc.address))][0]
asm.setdefault(inst_ev.pc.address, InstInfo(typ=inst_typ, inst=inst_disasm))
asm[inst_ev.pc.address].on_ev(inst_ev)
inst_execs.append(InstExec(inst_typ, inst_disasm, inst_ev.stall, inst_ev.duration, inst_ev.time))
if ev.instructions_size > 0:
self.wave_events[key:=PrgExec(self.active_kern, ev.wave_id, ev.cu, ev.simd)] = asm
self.wave_events[key:=PrgExec(unwrap(self.active_kern), ev.wave_id, ev.cu, ev.simd)] = asm
self.inst_execs[key] = inst_execs
def decode(profile:list[ProfileEvent]) -> _ROCParseCtx:
@@ -125,7 +125,7 @@ def decode(profile:list[ProfileEvent]) -> _ROCParseCtx:
@rocprof.rocprof_trace_decoder_isa_callback_t
def isa_cb(instr_ptr, mem_size_ptr, size_ptr, pc, data_ptr):
instr, mem_size_ptr[0] = ROCParseCtx.disasms[(ROCParseCtx.active_kern, pc.address)]
instr, mem_size_ptr[0] = ROCParseCtx.disasms[(unwrap(ROCParseCtx.active_kern), pc.address)]
# this is the number of bytes to next instruction, set to 0 for end_pgm
if instr == "s_endpgm": mem_size_ptr[0] = 0

View File

@@ -204,20 +204,19 @@ def load_sqtt(profile:list[ProfileEvent]) -> None:
return ctxs.append({"name":"Counters", "steps":[step]})
try: from extra.sqtt.roc import decode
except Exception: return err("DECODER IMPORT ISSUE")
try:
rctx = decode(profile)
steps:list[dict] = []
for k,v in rctx.inst_execs.items():
if k.wave == 0:
if (r:=ref_map.get(name:=k.name)): name = ctxs[r]["name"]
steps.append({"name":name, "depth":0, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters",
"data":{"src":trace.keys[r].ret.src if r else name, "lang":"cpp"}})
rows = [(e.inst, e.time, e.time-v[i-1].time if i else 0, e.dur, e.stall, str(e.typ).split("_")[-1]) for i,e in enumerate(v)]
summary = [{"label":"Total Cycles", "value":v[-1].time-v[0].time if v else 0}, {"label":"CU", "value":k.cu}, {"label":"SIMD", "value":k.simd}]
steps.append({"name":f"Wave {k.wave}", "depth":1, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters",
"data":{"rows":rows, "cols":["Instruction", "Clk", "Wait", "Duration", "Stall", "Type"], "summary":summary}})
if not steps: return err("EMPTY SQTT OUTPUT", f"{len(sqtt_events)} SQTT events recorded, none got decoded")
try: rctx = decode(profile)
except Exception: return err("DECODER ERROR")
if not rctx.inst_execs: return err("EMPTY SQTT OUTPUT", f"{len(sqtt_events)} SQTT events recorded, none got decoded")
steps:list[dict] = []
for k,v in rctx.inst_execs.items():
if k.wave == 0:
if (r:=ref_map.get(name:=k.name)): name = ctxs[r]["name"]
steps.append({"name":name, "depth":0, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters",
"data":{"src":trace.keys[r].ret.src if r else name, "lang":"cpp"}})
rows = [(e.inst, e.time, e.time-v[i-1].time if i else 0, e.dur, e.stall, str(e.typ).split("_")[-1]) for i,e in enumerate(v)]
summary = [{"label":"Total Cycles", "value":v[-1].time-v[0].time if v else 0}, {"label":"CU", "value":k.cu}, {"label":"SIMD", "value":k.simd}]
steps.append({"name":f"Wave {k.wave}", "depth":1, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters",
"data":{"rows":rows, "cols":["Instruction", "Clk", "Wait", "Duration", "Stall", "Type"], "summary":summary}})
ctxs.append({"name":"Counters", "steps":steps})
def get_profile(profile:list[ProfileEvent]) -> bytes|None: