mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
cleanup sqtt tooling (#13188)
* cleanup viz/serve.py * use latest profile in rgptool.py * unwrap nullable in roc.py, fix disasms typing
This commit is contained in:
@@ -4,7 +4,7 @@ import argparse, ctypes, struct, hashlib, pickle, code, typing, functools
|
||||
import tinygrad.runtime.autogen.sqtt as sqtt
|
||||
from tinygrad.device import ProfileEvent, ProfileDeviceEvent, ProfileProgramEvent
|
||||
from tinygrad.runtime.ops_amd import ProfileSQTTEvent
|
||||
from tinygrad.helpers import round_up, flatten, all_same
|
||||
from tinygrad.helpers import round_up, flatten, all_same, temp
|
||||
from dataclasses import dataclass
|
||||
|
||||
CHUNK_CLASSES = {
|
||||
@@ -210,7 +210,7 @@ class RGP:
|
||||
flags=0,
|
||||
trace_shader_core_clock=0x93f05080,
|
||||
trace_memory_clock=0x4a723a40,
|
||||
device_id={110000: 0x744c, 110003: 0x7480, 120001: 0x7550}[device_props['gfx_target_version']],
|
||||
device_id={110000: 0x744c, 110003: 0x7480, 120001: 0x7550, 120000: 0x7550}[device_props['gfx_target_version']],
|
||||
device_revision_id=0xc8,
|
||||
vgprs_per_simd=1536,
|
||||
sgprs_per_simd=128*16,
|
||||
@@ -324,7 +324,7 @@ class RGP:
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(prog='rgptool', description='A tool to create (from pickled tinygrad profile), inspect and modify Radeon GPU Profiler files')
|
||||
parser.add_argument('command')
|
||||
parser.add_argument('input')
|
||||
parser.add_argument('input', nargs='?', default=temp("profile.pkl", append_user=True))
|
||||
parser.add_argument('-d', '--device')
|
||||
parser.add_argument('-o', '--output')
|
||||
args = parser.parse_args()
|
||||
@@ -346,3 +346,4 @@ if __name__ == '__main__':
|
||||
|
||||
if args.output is not None:
|
||||
with open(args.output, 'wb+') as fd: fd.write(rgp.to_bytes())
|
||||
print(f"Saved to {args.output}")
|
||||
|
||||
@@ -60,7 +60,7 @@ class _ROCParseCtx:
|
||||
def __init__(self, dev_evs:dict[str, ProfileDeviceEvent], sqtt_evs:list[ProfileSQTTEvent], prog_evs:list[ProfileProgramEvent]):
|
||||
self.dev_evs, self.sqtt_evs, self.prog_evs = dev_evs, iter(sqtt_evs), prog_evs
|
||||
self.wave_events:dict[PrgExec, dict[int, InstInfo]] = {}
|
||||
self.disasms:dict[int, tuple[str, int]] = {}
|
||||
self.disasms:dict[tuple[str, int], tuple[str, int]] = {}
|
||||
self.inst_execs:dict[PrgExec, list[InstExec]] = {}
|
||||
|
||||
for prog in prog_evs:
|
||||
@@ -85,13 +85,13 @@ class _ROCParseCtx:
|
||||
for j in range(ev.instructions_size):
|
||||
inst_ev = ev.instructions_array[j]
|
||||
inst_typ = rocprof.rocprofiler_thread_trace_decoder_inst_category_t__enumvalues[inst_ev.category]
|
||||
inst_disasm = self.disasms[(self.active_kern, inst_ev.pc.address)][0]
|
||||
inst_disasm = self.disasms[(unwrap(self.active_kern), unwrap(inst_ev.pc.address))][0]
|
||||
asm.setdefault(inst_ev.pc.address, InstInfo(typ=inst_typ, inst=inst_disasm))
|
||||
asm[inst_ev.pc.address].on_ev(inst_ev)
|
||||
inst_execs.append(InstExec(inst_typ, inst_disasm, inst_ev.stall, inst_ev.duration, inst_ev.time))
|
||||
|
||||
if ev.instructions_size > 0:
|
||||
self.wave_events[key:=PrgExec(self.active_kern, ev.wave_id, ev.cu, ev.simd)] = asm
|
||||
self.wave_events[key:=PrgExec(unwrap(self.active_kern), ev.wave_id, ev.cu, ev.simd)] = asm
|
||||
self.inst_execs[key] = inst_execs
|
||||
|
||||
def decode(profile:list[ProfileEvent]) -> _ROCParseCtx:
|
||||
@@ -125,7 +125,7 @@ def decode(profile:list[ProfileEvent]) -> _ROCParseCtx:
|
||||
|
||||
@rocprof.rocprof_trace_decoder_isa_callback_t
|
||||
def isa_cb(instr_ptr, mem_size_ptr, size_ptr, pc, data_ptr):
|
||||
instr, mem_size_ptr[0] = ROCParseCtx.disasms[(ROCParseCtx.active_kern, pc.address)]
|
||||
instr, mem_size_ptr[0] = ROCParseCtx.disasms[(unwrap(ROCParseCtx.active_kern), pc.address)]
|
||||
|
||||
# this is the number of bytes to next instruction, set to 0 for end_pgm
|
||||
if instr == "s_endpgm": mem_size_ptr[0] = 0
|
||||
|
||||
@@ -204,20 +204,19 @@ def load_sqtt(profile:list[ProfileEvent]) -> None:
|
||||
return ctxs.append({"name":"Counters", "steps":[step]})
|
||||
try: from extra.sqtt.roc import decode
|
||||
except Exception: return err("DECODER IMPORT ISSUE")
|
||||
try:
|
||||
rctx = decode(profile)
|
||||
steps:list[dict] = []
|
||||
for k,v in rctx.inst_execs.items():
|
||||
if k.wave == 0:
|
||||
if (r:=ref_map.get(name:=k.name)): name = ctxs[r]["name"]
|
||||
steps.append({"name":name, "depth":0, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters",
|
||||
"data":{"src":trace.keys[r].ret.src if r else name, "lang":"cpp"}})
|
||||
rows = [(e.inst, e.time, e.time-v[i-1].time if i else 0, e.dur, e.stall, str(e.typ).split("_")[-1]) for i,e in enumerate(v)]
|
||||
summary = [{"label":"Total Cycles", "value":v[-1].time-v[0].time if v else 0}, {"label":"CU", "value":k.cu}, {"label":"SIMD", "value":k.simd}]
|
||||
steps.append({"name":f"Wave {k.wave}", "depth":1, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters",
|
||||
"data":{"rows":rows, "cols":["Instruction", "Clk", "Wait", "Duration", "Stall", "Type"], "summary":summary}})
|
||||
if not steps: return err("EMPTY SQTT OUTPUT", f"{len(sqtt_events)} SQTT events recorded, none got decoded")
|
||||
try: rctx = decode(profile)
|
||||
except Exception: return err("DECODER ERROR")
|
||||
if not rctx.inst_execs: return err("EMPTY SQTT OUTPUT", f"{len(sqtt_events)} SQTT events recorded, none got decoded")
|
||||
steps:list[dict] = []
|
||||
for k,v in rctx.inst_execs.items():
|
||||
if k.wave == 0:
|
||||
if (r:=ref_map.get(name:=k.name)): name = ctxs[r]["name"]
|
||||
steps.append({"name":name, "depth":0, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters",
|
||||
"data":{"src":trace.keys[r].ret.src if r else name, "lang":"cpp"}})
|
||||
rows = [(e.inst, e.time, e.time-v[i-1].time if i else 0, e.dur, e.stall, str(e.typ).split("_")[-1]) for i,e in enumerate(v)]
|
||||
summary = [{"label":"Total Cycles", "value":v[-1].time-v[0].time if v else 0}, {"label":"CU", "value":k.cu}, {"label":"SIMD", "value":k.simd}]
|
||||
steps.append({"name":f"Wave {k.wave}", "depth":1, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters",
|
||||
"data":{"rows":rows, "cols":["Instruction", "Clk", "Wait", "Duration", "Stall", "Type"], "summary":summary}})
|
||||
ctxs.append({"name":"Counters", "steps":steps})
|
||||
|
||||
def get_profile(profile:list[ProfileEvent]) -> bytes|None:
|
||||
|
||||
Reference in New Issue
Block a user