mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
viz: render shader clock frequency graph (#15197)
This commit is contained in:
@@ -48,11 +48,16 @@ def decode_profile(data:bytes) -> dict:
|
||||
name, ref, key, st, dur, fmt = u("<IIIIfI")
|
||||
v["events"].append({"name":strings[name], "ref":option(ref), "key":option(key), "st":st, "dur":dur, "fmt":strings[fmt]})
|
||||
else:
|
||||
v["linear"] = u("<B")[0]
|
||||
v["peak"] = u("<Q")[0]
|
||||
for _ in range(event_count):
|
||||
alloc, ts, key = u("<BII")
|
||||
if alloc: v["events"].append({"event":"alloc", "ts":ts, "key":key, "arg": {"dtype":strings[u("<I")[0]], "sz":u("<Q")[0]}})
|
||||
else: v["events"].append({"event":"free", "ts":ts, "key":key, "arg": {"users":[u("<IIIB") for _ in range(u("<I")[0])]}})
|
||||
if v["linear"]:
|
||||
ts, value = u("<IQ")
|
||||
v["events"].append({"event":"freq", "ts":ts, "value":value})
|
||||
else:
|
||||
alloc, ts, key = u("<BII")
|
||||
if alloc: v["events"].append({"event":"alloc", "ts":ts, "key":key, "arg": {"dtype":strings[u("<I")[0]], "sz":u("<Q")[0]}})
|
||||
else: v["events"].append({"event":"free", "ts":ts, "key":key, "arg": {"users":[u("<IIIB") for _ in range(u("<I")[0])]}})
|
||||
return {"dur":total_dur, "peak":global_peak, "layout":layout, "markers":markers}
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -78,7 +78,14 @@ class TestSQTTMapBase(unittest.TestCase):
|
||||
for event in events:
|
||||
if (p:=kern_events.get(event.kern)) is None: continue
|
||||
with self.subTest(example=name, kern=event.kern):
|
||||
events = [e for e in sqtt_timeline(event.blob, p.lib, target) if type(e).__name__ == "ProfileRangeEvent"]
|
||||
if not (timeline:=sqtt_timeline(event.blob, p.lib, target)): continue
|
||||
frequency = [e.key for e in timeline if type(e).__name__ == "ProfilePointEvent" and e.name == "freq_hz"]
|
||||
mean = sum(frequency) / len(frequency)
|
||||
variance = sum((v - mean) ** 2 for v in frequency) / len(frequency)
|
||||
self.assertGreater(mean, 0)
|
||||
self.assertGreater(variance, 0)
|
||||
if DEBUG >= 2: print(f"{name:20s} SE:{event.se} {mean/1e9:.2f} GHz mean, {variance/1e18:.2f} GHz^2 variance")
|
||||
events = [e for e in timeline if type(e).__name__ == "ProfileRangeEvent"]
|
||||
insts, execs = 0, 0
|
||||
for e in events:
|
||||
if "EXEC" in e.device:
|
||||
|
||||
@@ -322,6 +322,7 @@ function setFocus(key) {
|
||||
}
|
||||
|
||||
const EventTypes = { EXEC:0, BUF:1 };
|
||||
const GraphConfig = [{ pcolor:"#c9a8ff", unit:"B", fillColor:"#2B1B72"}, { pcolor:"#4fa3cc", unit:"Hz", fillColor:"#4fa3cc"}];
|
||||
|
||||
async function renderProfiler(path, opts) {
|
||||
displaySelection("#profiler");
|
||||
@@ -425,13 +426,15 @@ async function renderProfiler(path, opts) {
|
||||
}
|
||||
div.style("height", levelHeight*levels.length+padding+"px").style("pointerEvents", "none");
|
||||
} else {
|
||||
const peak = u64();
|
||||
const linear = u8(), peak = u64();
|
||||
const config = GraphConfig[linear];
|
||||
const timestamps = [], valueMap = new Map();
|
||||
// start by unpacking the raw events
|
||||
const memEvents = [];
|
||||
let x = 0, y = 0, shapeIdx = 0;
|
||||
const allocs = new Map();
|
||||
for (let j=0; j<eventsLen; j++) {
|
||||
if (linear) { const ts = u32(), value = u64(); timestamps.push(ts); valueMap.set(ts, value); continue; }
|
||||
const alloc = u8(), ts = u32(), key = u32();
|
||||
if (alloc) {
|
||||
const dtype = strings[u32()], sz = u64(), nbytes = dtypeSize[dtype]*sz;
|
||||
@@ -449,11 +452,11 @@ async function renderProfiler(path, opts) {
|
||||
}
|
||||
}
|
||||
timestamps.push(dur);
|
||||
const height = heightScale(peak);
|
||||
const height = linear ? (baseHeight-padding)*(opts.heightScale ?? 1)*2 : heightScale(peak);
|
||||
const yscale = d3.scaleLinear().domain([0, peak]).range([height, 0]);
|
||||
// generic polygon merger
|
||||
const base0 = yscale(0);
|
||||
const sum = {x:[], y0:[], y1:[], fillColor:"#2B1B72"};
|
||||
const sum = {x:[], y0:[], y1:[], fillColor:config.fillColor};
|
||||
for (let i=0; i<timestamps.length-1; i++) {
|
||||
const yv = yscale(valueMap.get(timestamps[i]));
|
||||
sum.x.push(timestamps[i], timestamps[i+1]); sum.y1.push(yv, yv); sum.y0.push(base0, base0);
|
||||
@@ -492,9 +495,10 @@ async function renderProfiler(path, opts) {
|
||||
return bufShapes;
|
||||
};
|
||||
if (timestamps.length > 0) data.first = data.first == null ? timestamps[0] : Math.min(data.first, timestamps[0]);
|
||||
data.tracks.set(k, { shapes:[sum], eventType, visible, offsetY, pcolor:"#c9a8ff", height, peak, scaleFactor:maxheight*4/height,
|
||||
get views() { return [[sum], buildBufShapes()]; }, valueMap, rowBorderColor });
|
||||
data.tracks.set(k, { shapes:[sum], eventType, linear, visible, offsetY, pcolor:config.pcolor, height, peak, scaleFactor:maxheight*4/height,
|
||||
get views() { return [[sum], linear ? null : buildBufShapes()]; }, valueMap, rowBorderColor });
|
||||
div.style("height", height+padding+"px").style("cursor", "pointer").on("click", (e) => {
|
||||
if (linear) return;
|
||||
const newFocus = e.currentTarget.id === focusedDevice ? null : e.currentTarget.id;
|
||||
let offset = 0;
|
||||
for (const [tid, track] of data.tracks) {
|
||||
@@ -502,7 +506,7 @@ async function renderProfiler(path, opts) {
|
||||
if (tid === newFocus) { track.shapes = track.views[1]; offset += rescaleTrack(track, tid, track.scaleFactor); }
|
||||
else if (tid === focusedDevice) { track.shapes = track.views[0]; offset += rescaleTrack(track, tid, 1/track.scaleFactor); }
|
||||
}
|
||||
data.axes.y = newFocus != null ? { domain:[0, (t=data.tracks.get(newFocus)).peak], range:[t.offsetY+t.height, t.offsetY], fmt:"B" } : null;
|
||||
data.axes.y = newFocus != null ? { domain:[0, (t=data.tracks.get(newFocus)).peak], range:[t.offsetY+t.height, t.offsetY], fmt:config.unit } : null;
|
||||
toggleCls(document.getElementById(focusedDevice), document.getElementById(newFocus), "expanded");
|
||||
focusedDevice = newFocus;
|
||||
return resize();
|
||||
@@ -541,26 +545,27 @@ async function renderProfiler(path, opts) {
|
||||
const visibleYStart = profilerEl.scrollTop-canvasTop + rect(profilerEl).top, visibleYEnd = visibleYStart+profilerEl.clientHeight;
|
||||
ctx.textBaseline = "middle";
|
||||
// draw shapes
|
||||
for (const [k, { shapes, eventType, visible, offsetY, valueMap, pcolor, scolor, rowBorderColor }] of data.tracks) {
|
||||
for (const [k, { shapes, eventType, linear, visible, offsetY, valueMap, pcolor, scolor, rowBorderColor }] of data.tracks) {
|
||||
visible.length = 0;
|
||||
const trackHeight = rect(document.getElementById(k)).height;
|
||||
if (offsetY+trackHeight < visibleYStart || offsetY > visibleYEnd) continue;
|
||||
const addBorder = scolor != null ? (w) => { if (w > 10) { ctx.strokeStyle = scolor; ctx.stroke(); } } : null;
|
||||
const config = GraphConfig[linear];
|
||||
for (const e of shapes) {
|
||||
if (eventType === EventTypes.BUF) { // generic polygon
|
||||
if (e.x[0]>et || e.x.at(-1)<st) continue;
|
||||
ctx.beginPath();
|
||||
const x = e.x.map(xscale);
|
||||
ctx.moveTo(x[0], offsetY+e.y0[0]);
|
||||
ctx.moveTo(x[0], offsetY+e.y1[0]);
|
||||
for (let i=1; i<x.length; i++) {
|
||||
ctx.lineTo(x[i], offsetY+e.y0[i]);
|
||||
ctx.lineTo(x[i], offsetY+e.y1[i]);
|
||||
let arg = e.arg;
|
||||
if (arg == null && valueMap != null) arg = {tooltipText: `Total: ${formatUnit(valueMap.get(e.x[i-1]), 'B')}`}
|
||||
if (arg == null && valueMap != null) arg = {tooltipText: formatUnit(valueMap.get(e.x[i-1]), config.unit)}
|
||||
visible.push({ x0:x[i-1], x1:x[i], y0:offsetY+e.y1[i-1], y1:offsetY+e.y0[i], arg });
|
||||
}
|
||||
for (let i=x.length-1; i>=0; i--) ctx.lineTo(x[i], offsetY+e.y1[i]);
|
||||
ctx.closePath();
|
||||
ctx.fillStyle = e.fillColor; ctx.fill();
|
||||
if (linear) { ctx.strokeStyle = e.fillColor; ctx.lineWidth = 2; ctx.stroke(); ctx.lineWidth = 1; }
|
||||
// walk the path back and fill the complete shape
|
||||
else { for (let i=x.length-1; i>=0; i--) ctx.lineTo(x[i], offsetY+e.y0[i]); ctx.closePath(); ctx.fillStyle = e.fillColor; ctx.fill(); }
|
||||
} else { // contiguous rect
|
||||
if (e.x>et || e.x+e.width<st) continue;
|
||||
const x = xscale(e.x);
|
||||
|
||||
@@ -239,13 +239,16 @@ def encode_mem_free(key:int, ts:int, execs:list[ProfilePointEvent], scache:dict)
|
||||
ei_encoding.append((e.key, enum_str(e.arg["name"], scache), num, mode))
|
||||
return struct.pack("<BIII", 0, ts, key, len(ei_encoding))+b"".join(struct.pack("<IIIB", *t) for t in ei_encoding)
|
||||
|
||||
def mem_layout(dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int, end_ts:int, peaks:list[int], dtype_size:dict[str, int],
|
||||
scache:dict[str, int]) -> bytes|None:
|
||||
def graph_layout(k:str, dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int, end_ts:int, peaks:list[int], dtype_size:dict[str, int],
|
||||
scache:dict[str, int]) -> tuple[str, bytes|None]:
|
||||
if k.startswith("LINE:"):
|
||||
xy = [(rel_ts(e.ts, start_ts), e.key) for st,_,_,e in dev_events if isinstance(e, ProfilePointEvent)]
|
||||
peaks.append(peak:=max([y for _,y in xy]))
|
||||
return k.replace("LINE:", ""), struct.pack("<BIBQ", 1, len(xy), 1, peak)+b"".join(struct.pack("<IQ", x, y) for x,y in xy)
|
||||
peak, mem = 0, 0
|
||||
temp:dict[int, int] = {}
|
||||
events:list[bytes] = []
|
||||
buf_ei:dict[int, list[ProfilePointEvent]] = {}
|
||||
|
||||
for st,_,_,e in dev_events:
|
||||
if not isinstance(e, ProfilePointEvent): continue
|
||||
if e.name == "alloc":
|
||||
@@ -262,7 +265,7 @@ def mem_layout(dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int,
|
||||
mem -= temp.pop(e.key)
|
||||
for t in temp: events.append(encode_mem_free(t, rel_ts(end_ts, start_ts), buf_ei.pop(t, []), scache))
|
||||
peaks.append(peak)
|
||||
return struct.pack("<BIQ", 1, len(events), peak)+b"".join(events) if events else None
|
||||
return f"{k} Memory", struct.pack("<BIBQ", 1, len(events), 0, peak)+b"".join(events) if events else None
|
||||
|
||||
# by default, VIZ does not start when there is an error
|
||||
# use this to instead display the traceback to the user
|
||||
@@ -272,7 +275,7 @@ def soft_err(fn:Callable):
|
||||
except Exception: fn({"src":traceback.format_exc()})
|
||||
|
||||
def row_tuple(row:str) -> tuple[tuple[int, int], ...]:
|
||||
return tuple((ord(ss[0][0]), int(ss[1])) if len(ss:=x.split(":"))>1 else (999,999) for x in row.split())
|
||||
return ((0, 0),) if "Clock" in row else tuple((ord(ss[0][0]), int(ss[1])) if len(ss:=x.split(":"))>1 else (999,999) for x in row.split())
|
||||
|
||||
# *** Performance counters
|
||||
|
||||
@@ -336,9 +339,11 @@ def load_amd_counters(ctxs:list[dict], profile:list[ProfileEvent]) -> None:
|
||||
|
||||
def sqtt_timeline(data:bytes, lib:bytes, target:str) -> list[ProfileEvent]:
|
||||
from tinygrad.renderer.amd.sqtt import map_insts, InstructionInfo, PacketType, INST, InstOp, VALUINST, IMMEDIATE, IMMEDIATE_MASK, VMEMEXEC, ALUEXEC
|
||||
from tinygrad.renderer.amd.sqtt import INST_RDNA4, InstOpRDNA4
|
||||
from tinygrad.renderer.amd.sqtt import INST_RDNA4, InstOpRDNA4, TS_DELTA_OR_MARK, TS_DELTA_OR_MARK_RDNA4
|
||||
ret:list[ProfileEvent] = []
|
||||
row_ends:dict[str, Decimal] = {}
|
||||
NS_PER_TICK = 10 # 100MHz
|
||||
prev_pair:tuple[int, int]|None = None # (shader, realtime)
|
||||
def add(name:str, p:PacketType, width=1, op:str|None=None, wave:int|None=None, info:InstructionInfo|None=None) -> None:
|
||||
row = f"WAVE:{wave}" if (wave:=getattr(p, "wave", wave)) is not None else f"{p.__class__.__name__}:0 {name}"
|
||||
ret.append(e:=ProfileRangeEvent(row, TracingKey(op or name, ret=f"PC:{info.pc}" if info else None), Decimal(p._time), Decimal(p._time+width)))
|
||||
@@ -346,6 +351,14 @@ def sqtt_timeline(data:bytes, lib:bytes, target:str) -> list[ProfileEvent]:
|
||||
row_ends[row] = unwrap(e.en)
|
||||
for p, info in map_insts(data, lib, target):
|
||||
if len(ret) > getenv("MAX_SQTT_PKTS", 50_000): break
|
||||
if isinstance(p, (TS_DELTA_OR_MARK, TS_DELTA_OR_MARK_RDNA4)) and p.is_marker:
|
||||
pair = (p._time, p.delta)
|
||||
if prev_pair is None: prev_pair = pair
|
||||
elif ret:
|
||||
(s0, r0), (s1, r1) = prev_pair, pair
|
||||
freq_hz = (s1 - s0) * 1_000_000_000 // ((r1 - r0) * NS_PER_TICK)
|
||||
ret.append(ProfilePointEvent("LINE:Shader Clock", "freq_hz", freq_hz, ts=Decimal(p._time)))
|
||||
prev_pair = pair
|
||||
if isinstance(p, (INST, INST_RDNA4)):
|
||||
name = p.op.name if isinstance(p.op, (InstOp, InstOpRDNA4)) else f"0x{p.op:02x}"
|
||||
add(name, p, width=10 if "BARRIER" in name else 1, info=info)
|
||||
@@ -431,7 +444,7 @@ def get_profile(profile:list[ProfileEvent], sort_fn:Callable[[str], Any]=device_
|
||||
for k,v in dev_events.items():
|
||||
v.sort(key=lambda e:e[0])
|
||||
layout[k] = timeline_layout(v, start_ts, scache)
|
||||
layout[f"{k} Memory"] = mem_layout(v, start_ts, unwrap(end_ts), peaks, dtype_size, scache)
|
||||
layout.update([graph_layout(k, v, start_ts, unwrap(end_ts), peaks, dtype_size, scache)])
|
||||
sorted_layout = sorted([k for k,v in layout.items() if v is not None], key=sort_fn)
|
||||
ret = [b"".join([struct.pack("<B", len(k)), k.encode(), unwrap(layout[k])]) for k in sorted_layout]
|
||||
index = json.dumps({"strings":list(scache), "dtypeSize":dtype_size, "markers":[{"ts":rel_ts(e.ts, start_ts), **e.arg} for e in markers],
|
||||
|
||||
Reference in New Issue
Block a user