viz: render shader clock frequency graph (#15197)

This commit is contained in:
qazal
2026-03-11 18:32:49 +02:00
committed by GitHub
parent 39b0f4bcc1
commit d3eef70162
4 changed files with 54 additions and 24 deletions

View File

@@ -48,11 +48,16 @@ def decode_profile(data:bytes) -> dict:
name, ref, key, st, dur, fmt = u("<IIIIfI")
v["events"].append({"name":strings[name], "ref":option(ref), "key":option(key), "st":st, "dur":dur, "fmt":strings[fmt]})
else:
v["linear"] = u("<B")[0]
v["peak"] = u("<Q")[0]
for _ in range(event_count):
alloc, ts, key = u("<BII")
if alloc: v["events"].append({"event":"alloc", "ts":ts, "key":key, "arg": {"dtype":strings[u("<I")[0]], "sz":u("<Q")[0]}})
else: v["events"].append({"event":"free", "ts":ts, "key":key, "arg": {"users":[u("<IIIB") for _ in range(u("<I")[0])]}})
if v["linear"]:
ts, value = u("<IQ")
v["events"].append({"event":"freq", "ts":ts, "value":value})
else:
alloc, ts, key = u("<BII")
if alloc: v["events"].append({"event":"alloc", "ts":ts, "key":key, "arg": {"dtype":strings[u("<I")[0]], "sz":u("<Q")[0]}})
else: v["events"].append({"event":"free", "ts":ts, "key":key, "arg": {"users":[u("<IIIB") for _ in range(u("<I")[0])]}})
return {"dur":total_dur, "peak":global_peak, "layout":layout, "markers":markers}
if __name__ == "__main__":

View File

@@ -78,7 +78,14 @@ class TestSQTTMapBase(unittest.TestCase):
for event in events:
if (p:=kern_events.get(event.kern)) is None: continue
with self.subTest(example=name, kern=event.kern):
events = [e for e in sqtt_timeline(event.blob, p.lib, target) if type(e).__name__ == "ProfileRangeEvent"]
if not (timeline:=sqtt_timeline(event.blob, p.lib, target)): continue
frequency = [e.key for e in timeline if type(e).__name__ == "ProfilePointEvent" and e.name == "freq_hz"]
mean = sum(frequency) / len(frequency)
variance = sum((v - mean) ** 2 for v in frequency) / len(frequency)
self.assertGreater(mean, 0)
self.assertGreater(variance, 0)
if DEBUG >= 2: print(f"{name:20s} SE:{event.se} {mean/1e9:.2f} GHz mean, {variance/1e18:.2f} GHz^2 variance")
events = [e for e in timeline if type(e).__name__ == "ProfileRangeEvent"]
insts, execs = 0, 0
for e in events:
if "EXEC" in e.device:

View File

@@ -322,6 +322,7 @@ function setFocus(key) {
}
const EventTypes = { EXEC:0, BUF:1 };
const GraphConfig = [{ pcolor:"#c9a8ff", unit:"B", fillColor:"#2B1B72"}, { pcolor:"#4fa3cc", unit:"Hz", fillColor:"#4fa3cc"}];
async function renderProfiler(path, opts) {
displaySelection("#profiler");
@@ -425,13 +426,15 @@ async function renderProfiler(path, opts) {
}
div.style("height", levelHeight*levels.length+padding+"px").style("pointerEvents", "none");
} else {
const peak = u64();
const linear = u8(), peak = u64();
const config = GraphConfig[linear];
const timestamps = [], valueMap = new Map();
// start by unpacking the raw events
const memEvents = [];
let x = 0, y = 0, shapeIdx = 0;
const allocs = new Map();
for (let j=0; j<eventsLen; j++) {
if (linear) { const ts = u32(), value = u64(); timestamps.push(ts); valueMap.set(ts, value); continue; }
const alloc = u8(), ts = u32(), key = u32();
if (alloc) {
const dtype = strings[u32()], sz = u64(), nbytes = dtypeSize[dtype]*sz;
@@ -449,11 +452,11 @@ async function renderProfiler(path, opts) {
}
}
timestamps.push(dur);
const height = heightScale(peak);
const height = linear ? (baseHeight-padding)*(opts.heightScale ?? 1)*2 : heightScale(peak);
const yscale = d3.scaleLinear().domain([0, peak]).range([height, 0]);
// generic polygon merger
const base0 = yscale(0);
const sum = {x:[], y0:[], y1:[], fillColor:"#2B1B72"};
const sum = {x:[], y0:[], y1:[], fillColor:config.fillColor};
for (let i=0; i<timestamps.length-1; i++) {
const yv = yscale(valueMap.get(timestamps[i]));
sum.x.push(timestamps[i], timestamps[i+1]); sum.y1.push(yv, yv); sum.y0.push(base0, base0);
@@ -492,9 +495,10 @@ async function renderProfiler(path, opts) {
return bufShapes;
};
if (timestamps.length > 0) data.first = data.first == null ? timestamps[0] : Math.min(data.first, timestamps[0]);
data.tracks.set(k, { shapes:[sum], eventType, visible, offsetY, pcolor:"#c9a8ff", height, peak, scaleFactor:maxheight*4/height,
get views() { return [[sum], buildBufShapes()]; }, valueMap, rowBorderColor });
data.tracks.set(k, { shapes:[sum], eventType, linear, visible, offsetY, pcolor:config.pcolor, height, peak, scaleFactor:maxheight*4/height,
get views() { return [[sum], linear ? null : buildBufShapes()]; }, valueMap, rowBorderColor });
div.style("height", height+padding+"px").style("cursor", "pointer").on("click", (e) => {
if (linear) return;
const newFocus = e.currentTarget.id === focusedDevice ? null : e.currentTarget.id;
let offset = 0;
for (const [tid, track] of data.tracks) {
@@ -502,7 +506,7 @@ async function renderProfiler(path, opts) {
if (tid === newFocus) { track.shapes = track.views[1]; offset += rescaleTrack(track, tid, track.scaleFactor); }
else if (tid === focusedDevice) { track.shapes = track.views[0]; offset += rescaleTrack(track, tid, 1/track.scaleFactor); }
}
data.axes.y = newFocus != null ? { domain:[0, (t=data.tracks.get(newFocus)).peak], range:[t.offsetY+t.height, t.offsetY], fmt:"B" } : null;
data.axes.y = newFocus != null ? { domain:[0, (t=data.tracks.get(newFocus)).peak], range:[t.offsetY+t.height, t.offsetY], fmt:config.unit } : null;
toggleCls(document.getElementById(focusedDevice), document.getElementById(newFocus), "expanded");
focusedDevice = newFocus;
return resize();
@@ -541,26 +545,27 @@ async function renderProfiler(path, opts) {
const visibleYStart = profilerEl.scrollTop-canvasTop + rect(profilerEl).top, visibleYEnd = visibleYStart+profilerEl.clientHeight;
ctx.textBaseline = "middle";
// draw shapes
for (const [k, { shapes, eventType, visible, offsetY, valueMap, pcolor, scolor, rowBorderColor }] of data.tracks) {
for (const [k, { shapes, eventType, linear, visible, offsetY, valueMap, pcolor, scolor, rowBorderColor }] of data.tracks) {
visible.length = 0;
const trackHeight = rect(document.getElementById(k)).height;
if (offsetY+trackHeight < visibleYStart || offsetY > visibleYEnd) continue;
const addBorder = scolor != null ? (w) => { if (w > 10) { ctx.strokeStyle = scolor; ctx.stroke(); } } : null;
const config = GraphConfig[linear];
for (const e of shapes) {
if (eventType === EventTypes.BUF) { // generic polygon
if (e.x[0]>et || e.x.at(-1)<st) continue;
ctx.beginPath();
const x = e.x.map(xscale);
ctx.moveTo(x[0], offsetY+e.y0[0]);
ctx.moveTo(x[0], offsetY+e.y1[0]);
for (let i=1; i<x.length; i++) {
ctx.lineTo(x[i], offsetY+e.y0[i]);
ctx.lineTo(x[i], offsetY+e.y1[i]);
let arg = e.arg;
if (arg == null && valueMap != null) arg = {tooltipText: `Total: ${formatUnit(valueMap.get(e.x[i-1]), 'B')}`}
if (arg == null && valueMap != null) arg = {tooltipText: formatUnit(valueMap.get(e.x[i-1]), config.unit)}
visible.push({ x0:x[i-1], x1:x[i], y0:offsetY+e.y1[i-1], y1:offsetY+e.y0[i], arg });
}
for (let i=x.length-1; i>=0; i--) ctx.lineTo(x[i], offsetY+e.y1[i]);
ctx.closePath();
ctx.fillStyle = e.fillColor; ctx.fill();
if (linear) { ctx.strokeStyle = e.fillColor; ctx.lineWidth = 2; ctx.stroke(); ctx.lineWidth = 1; }
// walk the path back and fill the complete shape
else { for (let i=x.length-1; i>=0; i--) ctx.lineTo(x[i], offsetY+e.y0[i]); ctx.closePath(); ctx.fillStyle = e.fillColor; ctx.fill(); }
} else { // contiguous rect
if (e.x>et || e.x+e.width<st) continue;
const x = xscale(e.x);

View File

@@ -239,13 +239,16 @@ def encode_mem_free(key:int, ts:int, execs:list[ProfilePointEvent], scache:dict)
ei_encoding.append((e.key, enum_str(e.arg["name"], scache), num, mode))
return struct.pack("<BIII", 0, ts, key, len(ei_encoding))+b"".join(struct.pack("<IIIB", *t) for t in ei_encoding)
def mem_layout(dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int, end_ts:int, peaks:list[int], dtype_size:dict[str, int],
scache:dict[str, int]) -> bytes|None:
def graph_layout(k:str, dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int, end_ts:int, peaks:list[int], dtype_size:dict[str, int],
scache:dict[str, int]) -> tuple[str, bytes|None]:
if k.startswith("LINE:"):
xy = [(rel_ts(e.ts, start_ts), e.key) for st,_,_,e in dev_events if isinstance(e, ProfilePointEvent)]
peaks.append(peak:=max([y for _,y in xy]))
return k.replace("LINE:", ""), struct.pack("<BIBQ", 1, len(xy), 1, peak)+b"".join(struct.pack("<IQ", x, y) for x,y in xy)
peak, mem = 0, 0
temp:dict[int, int] = {}
events:list[bytes] = []
buf_ei:dict[int, list[ProfilePointEvent]] = {}
for st,_,_,e in dev_events:
if not isinstance(e, ProfilePointEvent): continue
if e.name == "alloc":
@@ -262,7 +265,7 @@ def mem_layout(dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int,
mem -= temp.pop(e.key)
for t in temp: events.append(encode_mem_free(t, rel_ts(end_ts, start_ts), buf_ei.pop(t, []), scache))
peaks.append(peak)
return struct.pack("<BIQ", 1, len(events), peak)+b"".join(events) if events else None
return f"{k} Memory", struct.pack("<BIBQ", 1, len(events), 0, peak)+b"".join(events) if events else None
# by default, VIZ does not start when there is an error
# use this to instead display the traceback to the user
@@ -272,7 +275,7 @@ def soft_err(fn:Callable):
except Exception: fn({"src":traceback.format_exc()})
def row_tuple(row:str) -> tuple[tuple[int, int], ...]:
return tuple((ord(ss[0][0]), int(ss[1])) if len(ss:=x.split(":"))>1 else (999,999) for x in row.split())
return ((0, 0),) if "Clock" in row else tuple((ord(ss[0][0]), int(ss[1])) if len(ss:=x.split(":"))>1 else (999,999) for x in row.split())
# *** Performance counters
@@ -336,9 +339,11 @@ def load_amd_counters(ctxs:list[dict], profile:list[ProfileEvent]) -> None:
def sqtt_timeline(data:bytes, lib:bytes, target:str) -> list[ProfileEvent]:
from tinygrad.renderer.amd.sqtt import map_insts, InstructionInfo, PacketType, INST, InstOp, VALUINST, IMMEDIATE, IMMEDIATE_MASK, VMEMEXEC, ALUEXEC
from tinygrad.renderer.amd.sqtt import INST_RDNA4, InstOpRDNA4
from tinygrad.renderer.amd.sqtt import INST_RDNA4, InstOpRDNA4, TS_DELTA_OR_MARK, TS_DELTA_OR_MARK_RDNA4
ret:list[ProfileEvent] = []
row_ends:dict[str, Decimal] = {}
NS_PER_TICK = 10 # 100MHz
prev_pair:tuple[int, int]|None = None # (shader, realtime)
def add(name:str, p:PacketType, width=1, op:str|None=None, wave:int|None=None, info:InstructionInfo|None=None) -> None:
row = f"WAVE:{wave}" if (wave:=getattr(p, "wave", wave)) is not None else f"{p.__class__.__name__}:0 {name}"
ret.append(e:=ProfileRangeEvent(row, TracingKey(op or name, ret=f"PC:{info.pc}" if info else None), Decimal(p._time), Decimal(p._time+width)))
@@ -346,6 +351,14 @@ def sqtt_timeline(data:bytes, lib:bytes, target:str) -> list[ProfileEvent]:
row_ends[row] = unwrap(e.en)
for p, info in map_insts(data, lib, target):
if len(ret) > getenv("MAX_SQTT_PKTS", 50_000): break
if isinstance(p, (TS_DELTA_OR_MARK, TS_DELTA_OR_MARK_RDNA4)) and p.is_marker:
pair = (p._time, p.delta)
if prev_pair is None: prev_pair = pair
elif ret:
(s0, r0), (s1, r1) = prev_pair, pair
freq_hz = (s1 - s0) * 1_000_000_000 // ((r1 - r0) * NS_PER_TICK)
ret.append(ProfilePointEvent("LINE:Shader Clock", "freq_hz", freq_hz, ts=Decimal(p._time)))
prev_pair = pair
if isinstance(p, (INST, INST_RDNA4)):
name = p.op.name if isinstance(p.op, (InstOp, InstOpRDNA4)) else f"0x{p.op:02x}"
add(name, p, width=10 if "BARRIER" in name else 1, info=info)
@@ -431,7 +444,7 @@ def get_profile(profile:list[ProfileEvent], sort_fn:Callable[[str], Any]=device_
for k,v in dev_events.items():
v.sort(key=lambda e:e[0])
layout[k] = timeline_layout(v, start_ts, scache)
layout[f"{k} Memory"] = mem_layout(v, start_ts, unwrap(end_ts), peaks, dtype_size, scache)
layout.update([graph_layout(k, v, start_ts, unwrap(end_ts), peaks, dtype_size, scache)])
sorted_layout = sorted([k for k,v in layout.items() if v is not None], key=sort_fn)
ret = [b"".join([struct.pack("<B", len(k)), k.encode(), unwrap(layout[k])]) for k in sorted_layout]
index = json.dumps({"strings":list(scache), "dtypeSize":dtype_size, "markers":[{"ts":rel_ts(e.ts, start_ts), **e.arg} for e in markers],