From 38a24731a15db69e45e9dfedeea5ddd651be67f9 Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Mon, 10 Nov 2025 20:52:57 +0800 Subject: [PATCH] cleanup sqtt tooling (#13188) * cleanup viz/serve.py * use latest profile in rgptool.py * unwrap nullable in roc.py, fix disasms typing --- extra/sqtt/rgptool.py | 7 ++++--- extra/sqtt/roc.py | 8 ++++---- tinygrad/viz/serve.py | 25 ++++++++++++------------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/extra/sqtt/rgptool.py b/extra/sqtt/rgptool.py index cd06f0dffd..4148a2ccc5 100755 --- a/extra/sqtt/rgptool.py +++ b/extra/sqtt/rgptool.py @@ -4,7 +4,7 @@ import argparse, ctypes, struct, hashlib, pickle, code, typing, functools import tinygrad.runtime.autogen.sqtt as sqtt from tinygrad.device import ProfileEvent, ProfileDeviceEvent, ProfileProgramEvent from tinygrad.runtime.ops_amd import ProfileSQTTEvent -from tinygrad.helpers import round_up, flatten, all_same +from tinygrad.helpers import round_up, flatten, all_same, temp from dataclasses import dataclass CHUNK_CLASSES = { @@ -210,7 +210,7 @@ class RGP: flags=0, trace_shader_core_clock=0x93f05080, trace_memory_clock=0x4a723a40, - device_id={110000: 0x744c, 110003: 0x7480, 120001: 0x7550}[device_props['gfx_target_version']], + device_id={110000: 0x744c, 110003: 0x7480, 120001: 0x7550, 120000: 0x7550}[device_props['gfx_target_version']], device_revision_id=0xc8, vgprs_per_simd=1536, sgprs_per_simd=128*16, @@ -324,7 +324,7 @@ class RGP: if __name__ == '__main__': parser = argparse.ArgumentParser(prog='rgptool', description='A tool to create (from pickled tinygrad profile), inspect and modify Radeon GPU Profiler files') parser.add_argument('command') - parser.add_argument('input') + parser.add_argument('input', nargs='?', default=temp("profile.pkl", append_user=True)) parser.add_argument('-d', '--device') parser.add_argument('-o', '--output') args = parser.parse_args() @@ -346,3 +346,4 @@ if __name__ == '__main__': if args.output is not None: with open(args.output, 'wb+') as fd: fd.write(rgp.to_bytes()) + print(f"Saved to {args.output}") diff --git a/extra/sqtt/roc.py b/extra/sqtt/roc.py index 109156747e..84a90a4996 100644 --- a/extra/sqtt/roc.py +++ b/extra/sqtt/roc.py @@ -60,7 +60,7 @@ class _ROCParseCtx: def __init__(self, dev_evs:dict[str, ProfileDeviceEvent], sqtt_evs:list[ProfileSQTTEvent], prog_evs:list[ProfileProgramEvent]): self.dev_evs, self.sqtt_evs, self.prog_evs = dev_evs, iter(sqtt_evs), prog_evs self.wave_events:dict[PrgExec, dict[int, InstInfo]] = {} - self.disasms:dict[int, tuple[str, int]] = {} + self.disasms:dict[tuple[str, int], tuple[str, int]] = {} self.inst_execs:dict[PrgExec, list[InstExec]] = {} for prog in prog_evs: @@ -85,13 +85,13 @@ class _ROCParseCtx: for j in range(ev.instructions_size): inst_ev = ev.instructions_array[j] inst_typ = rocprof.rocprofiler_thread_trace_decoder_inst_category_t__enumvalues[inst_ev.category] - inst_disasm = self.disasms[(self.active_kern, inst_ev.pc.address)][0] + inst_disasm = self.disasms[(unwrap(self.active_kern), unwrap(inst_ev.pc.address))][0] asm.setdefault(inst_ev.pc.address, InstInfo(typ=inst_typ, inst=inst_disasm)) asm[inst_ev.pc.address].on_ev(inst_ev) inst_execs.append(InstExec(inst_typ, inst_disasm, inst_ev.stall, inst_ev.duration, inst_ev.time)) if ev.instructions_size > 0: - self.wave_events[key:=PrgExec(self.active_kern, ev.wave_id, ev.cu, ev.simd)] = asm + self.wave_events[key:=PrgExec(unwrap(self.active_kern), ev.wave_id, ev.cu, ev.simd)] = asm self.inst_execs[key] = inst_execs def decode(profile:list[ProfileEvent]) -> _ROCParseCtx: @@ -125,7 +125,7 @@ def decode(profile:list[ProfileEvent]) -> _ROCParseCtx: @rocprof.rocprof_trace_decoder_isa_callback_t def isa_cb(instr_ptr, mem_size_ptr, size_ptr, pc, data_ptr): - instr, mem_size_ptr[0] = ROCParseCtx.disasms[(ROCParseCtx.active_kern, pc.address)] + instr, mem_size_ptr[0] = ROCParseCtx.disasms[(unwrap(ROCParseCtx.active_kern), pc.address)] # this is the number of bytes to next instruction, set to 0 for end_pgm if instr == "s_endpgm": mem_size_ptr[0] = 0 diff --git a/tinygrad/viz/serve.py b/tinygrad/viz/serve.py index fab868732f..fc35a00d0b 100755 --- a/tinygrad/viz/serve.py +++ b/tinygrad/viz/serve.py @@ -204,20 +204,19 @@ def load_sqtt(profile:list[ProfileEvent]) -> None: return ctxs.append({"name":"Counters", "steps":[step]}) try: from extra.sqtt.roc import decode except Exception: return err("DECODER IMPORT ISSUE") - try: - rctx = decode(profile) - steps:list[dict] = [] - for k,v in rctx.inst_execs.items(): - if k.wave == 0: - if (r:=ref_map.get(name:=k.name)): name = ctxs[r]["name"] - steps.append({"name":name, "depth":0, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters", - "data":{"src":trace.keys[r].ret.src if r else name, "lang":"cpp"}}) - rows = [(e.inst, e.time, e.time-v[i-1].time if i else 0, e.dur, e.stall, str(e.typ).split("_")[-1]) for i,e in enumerate(v)] - summary = [{"label":"Total Cycles", "value":v[-1].time-v[0].time if v else 0}, {"label":"CU", "value":k.cu}, {"label":"SIMD", "value":k.simd}] - steps.append({"name":f"Wave {k.wave}", "depth":1, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters", - "data":{"rows":rows, "cols":["Instruction", "Clk", "Wait", "Duration", "Stall", "Type"], "summary":summary}}) - if not steps: return err("EMPTY SQTT OUTPUT", f"{len(sqtt_events)} SQTT events recorded, none got decoded") + try: rctx = decode(profile) except Exception: return err("DECODER ERROR") + if not rctx.inst_execs: return err("EMPTY SQTT OUTPUT", f"{len(sqtt_events)} SQTT events recorded, none got decoded") + steps:list[dict] = [] + for k,v in rctx.inst_execs.items(): + if k.wave == 0: + if (r:=ref_map.get(name:=k.name)): name = ctxs[r]["name"] + steps.append({"name":name, "depth":0, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters", + "data":{"src":trace.keys[r].ret.src if r else name, "lang":"cpp"}}) + rows = [(e.inst, e.time, e.time-v[i-1].time if i else 0, e.dur, e.stall, str(e.typ).split("_")[-1]) for i,e in enumerate(v)] + summary = [{"label":"Total Cycles", "value":v[-1].time-v[0].time if v else 0}, {"label":"CU", "value":k.cu}, {"label":"SIMD", "value":k.simd}] + steps.append({"name":f"Wave {k.wave}", "depth":1, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters", + "data":{"rows":rows, "cols":["Instruction", "Clk", "Wait", "Duration", "Stall", "Type"], "summary":summary}}) ctxs.append({"name":"Counters", "steps":steps}) def get_profile(profile:list[ProfileEvent]) -> bytes|None: