From af17e072512ee34fe102cc1e83fb641ff72d0416 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Wed, 12 Nov 2025 22:40:37 +0800 Subject: [PATCH] viz: sqtt touchups (#13228) * viz: sqtt touchups * revert * matches --- extra/sqtt/roc.py | 6 ++++-- tinygrad/viz/serve.py | 17 ++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/extra/sqtt/roc.py b/extra/sqtt/roc.py index 6ed6c3e7a9..531554c1b1 100644 --- a/extra/sqtt/roc.py +++ b/extra/sqtt/roc.py @@ -35,7 +35,7 @@ class InstInfo: hit:int=0 lat:int=0 stall:int=0 - def __str__(self): return f"{self.inst:>20} hits:{self.typ:>6} hits:{self.hit:>6} latency:{self.lat:>6} stall:{self.stall:>6}" + def __str__(self): return f"{self.inst:>20} type:{self.typ:>6} hits:{self.hit:>6} latency:{self.lat:>6} stall:{self.stall:>6}" def on_ev(self, ev): self.hit, self.lat, self.stall = self.hit + 1, self.lat + ev.duration, self.stall + ev.stall @@ -61,6 +61,8 @@ class WaveExec: wave_id:int cu:int simd:int + begin_time:int + end_time:int insts:list[InstExec] class _ROCParseCtx: @@ -99,7 +101,7 @@ class _ROCParseCtx: if ev.instructions_size > 0: self.wave_events[key:=PrgExec(unwrap(self.active_kern), ev.wave_id, ev.cu, ev.simd)] = asm - self.inst_execs.setdefault(key.name, []).append(WaveExec(ev.wave_id, ev.cu, ev.simd, inst_execs)) + self.inst_execs.setdefault(key.name, []).append(WaveExec(ev.wave_id, ev.cu, ev.simd, ev.begin_time, ev.end_time, inst_execs)) def decode(profile:list[ProfileEvent]) -> _ROCParseCtx: dev_events:dict[str, ProfileDeviceEvent] = {} diff --git a/tinygrad/viz/serve.py b/tinygrad/viz/serve.py index 952928699f..b0366dfa4c 100755 --- a/tinygrad/viz/serve.py +++ b/tinygrad/viz/serve.py @@ -217,12 +217,23 @@ def load_sqtt(profile:list[ProfileEvent]) -> None: if (r:=ref_map.get(name)): name = ctxs[r]["name"] steps.append({"name":name, "depth":0, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters", "data":{"src":trace.keys[r].ret.src if r else name, "lang":"cpp"}}) + + # Idle: The total time gap between the completion of previous instruction and the beginning of the current instruction. + # The idle time can be caused by: + # * Arbiter loss + # * Source or destination register dependency + # * Instruction cache miss + # Stall: The total number of cycles the hardware pipe couldn't issue an instruction. + # Duration: Total latency in cycles, defined as "Stall time + Issue time" for gfx9 or "Stall time + Execute time" for gfx10+. for w in waves: - rows = [(e.inst, e.time, e.time-(w.insts[i-1].time if i else 0), e.dur, e.stall, str(e.typ).split("_")[-1]) for i,e in enumerate(w.insts)] - summary = [{"label":"Total Cycles", "value":w.insts[-1].time-w.insts[0].time if w.insts else 0}, {"label":"CU", "value":w.cu}, + rows, prev_instr = [], w.begin_time + for i,e in enumerate(w.insts): + rows.append((e.inst, e.time, max(0, e.time-prev_instr), e.dur, e.stall, str(e.typ).split("_")[-1])) + prev_instr = max(prev_instr, e.time + e.dur) + summary = [{"label":"Total Cycles", "value":w.end_time-w.begin_time}, {"label":"CU", "value":w.cu}, {"label":"SIMD", "value":w.simd}] steps.append({"name":f"Wave {w.wave_id}", "depth":1, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters", - "data":{"rows":rows, "cols":["Instruction", "Clk", "Wait", "Duration", "Stall", "Type"], "summary":summary}}) + "data":{"rows":rows, "cols":["Instruction", "Clk", "Idle", "Duration", "Stall", "Type"], "summary":summary}}) ctxs.append({"name":"Counters", "steps":steps}) def get_profile(profile:list[ProfileEvent]) -> bytes|None: