From d3eef7016206eb32041b5b3fe8d3aae7ad5688eb Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Wed, 11 Mar 2026 18:32:49 +0200 Subject: [PATCH] viz: render shader clock frequency graph (#15197) --- extra/viz/cli.py | 11 ++++++++--- test/amd/test_sqttmap.py | 9 ++++++++- tinygrad/viz/js/index.js | 31 ++++++++++++++++++------------- tinygrad/viz/serve.py | 27 ++++++++++++++++++++------- 4 files changed, 54 insertions(+), 24 deletions(-) diff --git a/extra/viz/cli.py b/extra/viz/cli.py index e6dfe04d99..d82e21cb66 100755 --- a/extra/viz/cli.py +++ b/extra/viz/cli.py @@ -48,11 +48,16 @@ def decode_profile(data:bytes) -> dict: name, ref, key, st, dur, fmt = u("= 2: print(f"{name:20s} SE:{event.se} {mean/1e9:.2f} GHz mean, {variance/1e18:.2f} GHz^2 variance") + events = [e for e in timeline if type(e).__name__ == "ProfileRangeEvent"] insts, execs = 0, 0 for e in events: if "EXEC" in e.device: diff --git a/tinygrad/viz/js/index.js b/tinygrad/viz/js/index.js index 05730691b9..2a0613a86c 100644 --- a/tinygrad/viz/js/index.js +++ b/tinygrad/viz/js/index.js @@ -322,6 +322,7 @@ function setFocus(key) { } const EventTypes = { EXEC:0, BUF:1 }; +const GraphConfig = [{ pcolor:"#c9a8ff", unit:"B", fillColor:"#2B1B72"}, { pcolor:"#4fa3cc", unit:"Hz", fillColor:"#4fa3cc"}]; async function renderProfiler(path, opts) { displaySelection("#profiler"); @@ -425,13 +426,15 @@ async function renderProfiler(path, opts) { } div.style("height", levelHeight*levels.length+padding+"px").style("pointerEvents", "none"); } else { - const peak = u64(); + const linear = u8(), peak = u64(); + const config = GraphConfig[linear]; const timestamps = [], valueMap = new Map(); // start by unpacking the raw events const memEvents = []; let x = 0, y = 0, shapeIdx = 0; const allocs = new Map(); for (let j=0; j 0) data.first = data.first == null ? timestamps[0] : Math.min(data.first, timestamps[0]); - data.tracks.set(k, { shapes:[sum], eventType, visible, offsetY, pcolor:"#c9a8ff", height, peak, scaleFactor:maxheight*4/height, - get views() { return [[sum], buildBufShapes()]; }, valueMap, rowBorderColor }); + data.tracks.set(k, { shapes:[sum], eventType, linear, visible, offsetY, pcolor:config.pcolor, height, peak, scaleFactor:maxheight*4/height, + get views() { return [[sum], linear ? null : buildBufShapes()]; }, valueMap, rowBorderColor }); div.style("height", height+padding+"px").style("cursor", "pointer").on("click", (e) => { + if (linear) return; const newFocus = e.currentTarget.id === focusedDevice ? null : e.currentTarget.id; let offset = 0; for (const [tid, track] of data.tracks) { @@ -502,7 +506,7 @@ async function renderProfiler(path, opts) { if (tid === newFocus) { track.shapes = track.views[1]; offset += rescaleTrack(track, tid, track.scaleFactor); } else if (tid === focusedDevice) { track.shapes = track.views[0]; offset += rescaleTrack(track, tid, 1/track.scaleFactor); } } - data.axes.y = newFocus != null ? { domain:[0, (t=data.tracks.get(newFocus)).peak], range:[t.offsetY+t.height, t.offsetY], fmt:"B" } : null; + data.axes.y = newFocus != null ? { domain:[0, (t=data.tracks.get(newFocus)).peak], range:[t.offsetY+t.height, t.offsetY], fmt:config.unit } : null; toggleCls(document.getElementById(focusedDevice), document.getElementById(newFocus), "expanded"); focusedDevice = newFocus; return resize(); @@ -541,26 +545,27 @@ async function renderProfiler(path, opts) { const visibleYStart = profilerEl.scrollTop-canvasTop + rect(profilerEl).top, visibleYEnd = visibleYStart+profilerEl.clientHeight; ctx.textBaseline = "middle"; // draw shapes - for (const [k, { shapes, eventType, visible, offsetY, valueMap, pcolor, scolor, rowBorderColor }] of data.tracks) { + for (const [k, { shapes, eventType, linear, visible, offsetY, valueMap, pcolor, scolor, rowBorderColor }] of data.tracks) { visible.length = 0; const trackHeight = rect(document.getElementById(k)).height; if (offsetY+trackHeight < visibleYStart || offsetY > visibleYEnd) continue; const addBorder = scolor != null ? (w) => { if (w > 10) { ctx.strokeStyle = scolor; ctx.stroke(); } } : null; + const config = GraphConfig[linear]; for (const e of shapes) { if (eventType === EventTypes.BUF) { // generic polygon if (e.x[0]>et || e.x.at(-1)=0; i--) ctx.lineTo(x[i], offsetY+e.y1[i]); - ctx.closePath(); - ctx.fillStyle = e.fillColor; ctx.fill(); + if (linear) { ctx.strokeStyle = e.fillColor; ctx.lineWidth = 2; ctx.stroke(); ctx.lineWidth = 1; } + // walk the path back and fill the complete shape + else { for (let i=x.length-1; i>=0; i--) ctx.lineTo(x[i], offsetY+e.y0[i]); ctx.closePath(); ctx.fillStyle = e.fillColor; ctx.fill(); } } else { // contiguous rect if (e.x>et || e.x+e.width bytes|None: +def graph_layout(k:str, dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int, end_ts:int, peaks:list[int], dtype_size:dict[str, int], + scache:dict[str, int]) -> tuple[str, bytes|None]: + if k.startswith("LINE:"): + xy = [(rel_ts(e.ts, start_ts), e.key) for st,_,_,e in dev_events if isinstance(e, ProfilePointEvent)] + peaks.append(peak:=max([y for _,y in xy])) + return k.replace("LINE:", ""), struct.pack(" tuple[tuple[int, int], ...]: - return tuple((ord(ss[0][0]), int(ss[1])) if len(ss:=x.split(":"))>1 else (999,999) for x in row.split()) + return ((0, 0),) if "Clock" in row else tuple((ord(ss[0][0]), int(ss[1])) if len(ss:=x.split(":"))>1 else (999,999) for x in row.split()) # *** Performance counters @@ -336,9 +339,11 @@ def load_amd_counters(ctxs:list[dict], profile:list[ProfileEvent]) -> None: def sqtt_timeline(data:bytes, lib:bytes, target:str) -> list[ProfileEvent]: from tinygrad.renderer.amd.sqtt import map_insts, InstructionInfo, PacketType, INST, InstOp, VALUINST, IMMEDIATE, IMMEDIATE_MASK, VMEMEXEC, ALUEXEC - from tinygrad.renderer.amd.sqtt import INST_RDNA4, InstOpRDNA4 + from tinygrad.renderer.amd.sqtt import INST_RDNA4, InstOpRDNA4, TS_DELTA_OR_MARK, TS_DELTA_OR_MARK_RDNA4 ret:list[ProfileEvent] = [] row_ends:dict[str, Decimal] = {} + NS_PER_TICK = 10 # 100MHz + prev_pair:tuple[int, int]|None = None # (shader, realtime) def add(name:str, p:PacketType, width=1, op:str|None=None, wave:int|None=None, info:InstructionInfo|None=None) -> None: row = f"WAVE:{wave}" if (wave:=getattr(p, "wave", wave)) is not None else f"{p.__class__.__name__}:0 {name}" ret.append(e:=ProfileRangeEvent(row, TracingKey(op or name, ret=f"PC:{info.pc}" if info else None), Decimal(p._time), Decimal(p._time+width))) @@ -346,6 +351,14 @@ def sqtt_timeline(data:bytes, lib:bytes, target:str) -> list[ProfileEvent]: row_ends[row] = unwrap(e.en) for p, info in map_insts(data, lib, target): if len(ret) > getenv("MAX_SQTT_PKTS", 50_000): break + if isinstance(p, (TS_DELTA_OR_MARK, TS_DELTA_OR_MARK_RDNA4)) and p.is_marker: + pair = (p._time, p.delta) + if prev_pair is None: prev_pair = pair + elif ret: + (s0, r0), (s1, r1) = prev_pair, pair + freq_hz = (s1 - s0) * 1_000_000_000 // ((r1 - r0) * NS_PER_TICK) + ret.append(ProfilePointEvent("LINE:Shader Clock", "freq_hz", freq_hz, ts=Decimal(p._time))) + prev_pair = pair if isinstance(p, (INST, INST_RDNA4)): name = p.op.name if isinstance(p.op, (InstOp, InstOpRDNA4)) else f"0x{p.op:02x}" add(name, p, width=10 if "BARRIER" in name else 1, info=info) @@ -431,7 +444,7 @@ def get_profile(profile:list[ProfileEvent], sort_fn:Callable[[str], Any]=device_ for k,v in dev_events.items(): v.sort(key=lambda e:e[0]) layout[k] = timeline_layout(v, start_ts, scache) - layout[f"{k} Memory"] = mem_layout(v, start_ts, unwrap(end_ts), peaks, dtype_size, scache) + layout.update([graph_layout(k, v, start_ts, unwrap(end_ts), peaks, dtype_size, scache)]) sorted_layout = sorted([k for k,v in layout.items() if v is not None], key=sort_fn) ret = [b"".join([struct.pack("