mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
sqtt: add occupancy events to the timeline (#13430)
This commit is contained in:
@@ -38,11 +38,18 @@ class InstExec:
|
||||
time:int
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class WaveExec:
|
||||
class WaveSlot:
|
||||
wave_id:int
|
||||
cu:int
|
||||
simd:int
|
||||
se:int
|
||||
@property
|
||||
def simd_loc(self) -> str: return f"SE:{self.se} CU:{self.cu} SIMD:{self.simd}"
|
||||
@property
|
||||
def wave_loc(self) -> str: return f"{self.simd_loc} WAVE:{self.wave_id}"
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class WaveExec(WaveSlot):
|
||||
begin_time:int
|
||||
end_time:int
|
||||
insts:bytearray
|
||||
@@ -53,11 +60,17 @@ class WaveExec:
|
||||
inst_typ = rocprof.enum_rocprofiler_thread_trace_decoder_inst_category_t.get(inst.category)
|
||||
yield InstExec(inst_typ, inst.pc.address, inst.stall, inst.duration, inst.time)
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class OccEvent(WaveSlot):
|
||||
time:int
|
||||
start:int
|
||||
|
||||
class _ROCParseCtx:
|
||||
def __init__(self, dev_evs:dict[str, ProfileDeviceEvent], sqtt_evs:list[ProfileSQTTEvent], prog_evs:list[ProfileProgramEvent]):
|
||||
self.dev_evs, self.sqtt_evs, self.prog_evs = dev_evs, iter(sqtt_evs), prog_evs
|
||||
self.disasms:dict[str, dict[int, tuple[str, int]]] = {}
|
||||
self.inst_execs:dict[str, list[WaveExec]] = {}
|
||||
self.occ_events:dict[str, list[OccEvent]] = {}
|
||||
|
||||
for prog in prog_evs:
|
||||
arch = "gfx%d%x%x" % ((trgt:=unwrap(dev_evs[prog.device].props)['gfx_target_version']) // 10000, (trgt // 100) % 100, trgt % 100)
|
||||
@@ -73,9 +86,12 @@ class _ROCParseCtx:
|
||||
|
||||
def on_occupancy_ev(self, ev:rocprof.rocprofiler_thread_trace_decoder_occupancy_t):
|
||||
if DEBUG >= 5: print(f"OCC {ev.time=} {self.active_se=} {ev.cu=} {ev.simd=} {ev.wave_id=} {ev.start=}")
|
||||
self.occ_events.setdefault(unwrap(self.active_kern), []).append(OccEvent(ev.wave_id, ev.cu, ev.simd, unwrap(self.active_se), ev.time, ev.start))
|
||||
|
||||
def on_wave_ev(self, ev:rocprof.rocprofiler_thread_trace_decoder_wave_t):
|
||||
if DEBUG >= 5: print(f"WAVE {ev.wave_id=} {self.active_se=} {ev.cu=} {ev.simd=} {ev.contexts=} {ev.begin_time=} {ev.end_time=}")
|
||||
# Skip wave events without instruction timings, occupancy events give the start and duration.
|
||||
if ev.instructions_size == 0: return
|
||||
|
||||
insts_blob = bytearray(sz:=ev.instructions_size * ctypes.sizeof(rocprof.rocprofiler_thread_trace_decoder_inst_t))
|
||||
ctypes.memmove((ctypes.c_char * sz).from_buffer(insts_blob), ev.instructions_array, sz)
|
||||
|
||||
@@ -273,6 +273,7 @@
|
||||
overflow-y: hidden;
|
||||
white-space: nowrap;
|
||||
display: flex;
|
||||
min-height: 32px;
|
||||
}
|
||||
#device-list > div:hover {
|
||||
background-color: rgba(20, 23, 35, 0.3);
|
||||
|
||||
@@ -158,7 +158,7 @@ const formatUnit = (d, unit="") => d3.format(".3~s")(d)+unit;
|
||||
|
||||
const colorScheme = {TINY:new Map([["Schedule","#1b5745"],["get_program","#1d2e62"],["compile","#63b0cd"],["DEFAULT","#354f52"]]),
|
||||
DEFAULT:["#2b2e39", "#2c2f3a", "#31343f", "#323544", "#2d303a", "#2e313c", "#343746", "#353847", "#3c4050", "#404459", "#444862", "#4a4e65"],
|
||||
BUFFER:["#342483", "#3E2E94", "#4938A4", "#5442B4", "#5E4CC2", "#674FCA"], SE:["#2b2e39"],
|
||||
BUFFER:["#342483", "#3E2E94", "#4938A4", "#5442B4", "#5E4CC2", "#674FCA"], SE:new Map([["OCC", "#101725"], ["INST", "#0A2042"]]),
|
||||
CATEGORICAL:["#ff8080", "#F4A261", "#C8F9D4", "#8D99AE", "#F4A261", "#ffffa2", "#ffffc0", "#87CEEB"],}
|
||||
const cycleColors = (lst, i) => lst[i%lst.length];
|
||||
|
||||
@@ -206,7 +206,7 @@ async function renderProfiler(path, unit, opts) {
|
||||
// layout once!
|
||||
if (data != null && data.path === path) return updateProgress({ start:false });
|
||||
// support non realtime x axis units
|
||||
const formatTime = unit === "realtime" ? formatMicroseconds : (s) => `${s} ${unit}`;
|
||||
const formatTime = unit === "realtime" ? formatMicroseconds : (s) => formatUnit(s, " "+unit);
|
||||
const profiler = d3.select("#profiler").html("");
|
||||
const buf = cache[path] ?? await fetchValue(path);
|
||||
const view = new DataView(buf);
|
||||
@@ -236,33 +236,36 @@ async function renderProfiler(path, unit, opts) {
|
||||
for (let i=0; i<layoutsLen; i++) {
|
||||
const nameLen = view.getUint8(offset, true); offset += 1;
|
||||
const k = textDecoder.decode(new Uint8Array(buf, offset, nameLen)); offset += nameLen;
|
||||
const div = deviceList.append("div").attr("id", k).text(k).style("padding", padding+"px").style("width", opts.width).style("min-height", opts.height);
|
||||
const div = deviceList.append("div").attr("id", k).text(k).style("padding", padding+"px").style("width", opts.width);
|
||||
const { y:baseY, height:baseHeight } = rect(div.node());
|
||||
const colors = colorScheme[k.split(":")[0]] ?? colorScheme.DEFAULT;
|
||||
const offsetY = baseY-canvasTop+padding/2;
|
||||
const shapes = [], visible = [];
|
||||
const eventType = u8(), eventsLen = u32();
|
||||
if (eventType === EventTypes.EXEC) {
|
||||
const levelHeight = baseHeight-padding;
|
||||
const levelHeight = (baseHeight-padding)*(opts.heightScale ?? 1);
|
||||
const levels = [];
|
||||
data.tracks.set(k, { shapes, eventType, visible, offsetY, pcolor:"#9ea2ad" });
|
||||
let colorKey, ref;
|
||||
for (let j=0; j<eventsLen; j++) {
|
||||
const e = {name:strings[u32()], ref:optional(u32()), key:optional(u32()), st:u32(), dur:f32(), info:strings[u32()] || null};
|
||||
// find a free level to put the event
|
||||
let depth = levels.findIndex(levelEt => e.st >= levelEt);
|
||||
const et = e.st+Math.trunc(e.dur);
|
||||
if (depth === -1) {
|
||||
depth = levels.length;
|
||||
levels.push(et);
|
||||
} else levels[depth] = et;
|
||||
let depth = 0;
|
||||
if (opts.levelKey != null) { depth = opts.levelKey(e); levels[depth] = 0; }
|
||||
else {
|
||||
depth = levels.findIndex(levelEt => e.st >= levelEt);
|
||||
const et = e.st+Math.trunc(e.dur);
|
||||
if (depth === -1) {
|
||||
depth = levels.length;
|
||||
levels.push(et);
|
||||
} else levels[depth] = et;
|
||||
}
|
||||
if (depth === 0) colorKey = e.name.split(" ")[0];
|
||||
if (!colorMap.has(colorKey)) {
|
||||
const color = colors instanceof Map ? (colors.get(colorKey) || colors.get("DEFAULT")) : cycleColors(colors, colorMap.size);
|
||||
colorMap.set(colorKey, d3.rgb(color));
|
||||
}
|
||||
const base = colorMap.get(colorKey), s = Math.min(Math.pow(1/0.7, depth), 240 / Math.max(base.r, base.g, base.b));
|
||||
const fillColor = d3.rgb(base.r*s, base.g*s, base.b*s).toString();
|
||||
const fillColor = colorMap.get(colorKey).brighter(0.3*depth).toString();
|
||||
const label = parseColors(e.name).map(({ color, st }) => ({ color, st, width:ctx.measureText(st).width }));
|
||||
let shapeRef = e.ref;
|
||||
if (shapeRef != null) { ref = {ctx:e.ref, step:0}; shapeRef = ref; }
|
||||
@@ -286,7 +289,7 @@ async function renderProfiler(path, unit, opts) {
|
||||
ctx:shapeRef?.ctx, step:shapeRef?.step };
|
||||
if (e.key != null) shapeMap.set(e.key, arg);
|
||||
// offset y by depth
|
||||
shapes.push({x:e.st, y:levelHeight*depth, width:e.dur, height:levelHeight, arg, label, fillColor });
|
||||
shapes.push({x:e.st, y:levelHeight*depth, width:e.dur, height:levelHeight, arg, label:opts.hideLabels ? null : label, fillColor });
|
||||
}
|
||||
div.style("height", levelHeight*levels.length+padding+"px").style("pointerEvents", "none");
|
||||
} else {
|
||||
@@ -472,7 +475,7 @@ async function renderProfiler(path, unit, opts) {
|
||||
drawLine(ctx, [x, x], [0, canvas.clientHeight], { color:m.color });
|
||||
ctx.fillText(m.name, x+2, 1);
|
||||
}
|
||||
for (const [p, color] of paths) { ctx.lineWidth = 1.4; ctx.strokeStyle = color; ctx.stroke(p); }
|
||||
for (const [p, color] of paths) { ctx.strokeStyle = color; ctx.stroke(p); }
|
||||
}
|
||||
|
||||
function resize() {
|
||||
@@ -707,7 +710,7 @@ async function main() {
|
||||
if (url.pathname+url.search !== ckey) e.close();
|
||||
else if (e.readyState === EventSource.OPEN) activeSrc = e;
|
||||
}
|
||||
if (ctx.name === "Profiler") return renderProfiler("/get_profile", "realtime", { width:"132px", height:"32px" });
|
||||
if (ctx.name === "Profiler") return renderProfiler("/get_profile", "realtime", { width:"132px" });
|
||||
if (workerUrl == null) await initWorker();
|
||||
if (ckey in cache) {
|
||||
ret = cache[ckey];
|
||||
@@ -715,7 +718,11 @@ async function main() {
|
||||
// ** Disassembly view
|
||||
if (!ckey.startsWith("/rewrites")) {
|
||||
if (!(ckey in cache)) cache[ckey] = ret = await fetchValue(ckey);
|
||||
if (ret instanceof ArrayBuffer) return renderProfiler(ckey, "clk", { height:"16px" }); // cycles on the x axis
|
||||
// cycles on the x axis
|
||||
if (ret instanceof ArrayBuffer) {
|
||||
opts = {heightScale:0.5, hideLabels:true, levelKey:(e) => parseInt(e.name.split(" ")[1].split(":")[1])};
|
||||
return renderProfiler(ckey, "clk", opts);
|
||||
}
|
||||
displaySelection("#custom");
|
||||
metadata.innerHTML = "";
|
||||
const root = d3.create("div").classed("raw-text", true).node();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
import multiprocessing, pickle, difflib, os, threading, json, time, sys, webbrowser, socket, argparse, socketserver, functools, codecs, io, struct
|
||||
import ctypes, pathlib, traceback
|
||||
import ctypes, pathlib, traceback, itertools
|
||||
from contextlib import redirect_stdout, redirect_stderr
|
||||
from decimal import Decimal
|
||||
from http.server import BaseHTTPRequestHandler
|
||||
@@ -225,19 +225,28 @@ def load_sqtt(profile:list[ProfileEvent]) -> None:
|
||||
if getenv("SQTT_PARSE"):
|
||||
from extra.sqtt.attempt_sqtt_parse import parse_sqtt_print_packets
|
||||
for e in sqtt_events: parse_sqtt_print_packets(e.blob)
|
||||
if not rctx.inst_execs: return err("EMPTY SQTT OUTPUT", f"{len(sqtt_events)} SQTT events recorded, none got decoded")
|
||||
if not any([rctx.inst_execs, rctx.occ_events]): return err("EMPTY SQTT OUTPUT", f"{len(sqtt_events)} SQTT events recorded, none got decoded")
|
||||
steps:list[dict] = []
|
||||
for name,disasm in rctx.disasms.items():
|
||||
units:dict[str, int] = {}
|
||||
events:list[ProfileEvent] = []
|
||||
# wave instruction events
|
||||
wave_insts:dict[str, dict] = {}
|
||||
inst_units:dict[str, itertools.count] = {}
|
||||
for w in rctx.inst_execs.get(name, []):
|
||||
if (row:=f"SE:{w.se} CU:{w.cu} SIMD:{w.simd} WAVE:{w.wave_id}") not in units: units[row] = 0
|
||||
units[row] += 1
|
||||
events.append(ProfileRangeEvent(row, f"N:{units[row]}", Decimal(w.begin_time), Decimal(w.end_time)))
|
||||
wave_insts[f"{row} N:{units[row]}"] = {"wave":w, "disasm":disasm, "run_number":units[row]}
|
||||
# gather and sort all wave execs of this kernel
|
||||
if (u:=w.wave_loc) not in inst_units: inst_units[u] = itertools.count(0)
|
||||
n = next(inst_units[u])
|
||||
events.append(ProfileRangeEvent(w.simd_loc, f"INST WAVE:{w.wave_id} N:{n}", Decimal(w.begin_time), Decimal(w.end_time)))
|
||||
wave_insts[f"{u} N:{n}"] = {"wave":w, "disasm":disasm, "run_number":n}
|
||||
# occupancy events
|
||||
units:dict[str, itertools.count] = {}
|
||||
wave_start:dict[str, int] = {}
|
||||
for occ in rctx.occ_events[name]:
|
||||
if (u:=occ.wave_loc) not in units: units[u] = itertools.count(0)
|
||||
if u in inst_units: continue
|
||||
if occ.start: wave_start[u] = occ.time
|
||||
else: events.append(ProfileRangeEvent(occ.simd_loc, f"OCC WAVE:{occ.wave_id} N:{next(units[u])}", Decimal(wave_start.pop(u)),Decimal(occ.time)))
|
||||
if not events: continue
|
||||
# gather and sort all sqtt events for this kernel
|
||||
events = [ProfilePointEvent(unit, "start", unit, ts=Decimal(0)) for unit in units]+events
|
||||
kernel = trace.keys[r].ret if (r:=ref_map.get(name)) else None
|
||||
steps.append(create_step(kernel.name if kernel is not None else name, ("/counters", len(ctxs), len(steps)),
|
||||
@@ -275,7 +284,7 @@ def get_profile(profile:list[ProfileEvent], sort_fn:Callable[[str], Any]|None=No
|
||||
(v:=dev_events[k]).sort(key=lambda e:e[0])
|
||||
layout[k] = timeline_layout(v, start_ts, scache)
|
||||
layout[f"{k} Memory"] = mem_layout(v, start_ts, unwrap(end_ts), peaks, dtype_size, scache)
|
||||
groups = sorted(layout.items(), key=lambda x: '' if len(ss:=x[0].split(" ")) == 1 else ss[1])
|
||||
groups = layout.items() if sort_fn is not None else sorted(layout.items(), key=lambda x: '' if len(ss:=x[0].split(" ")) == 1 else ss[1])
|
||||
ret = [b"".join([struct.pack("<B", len(k)), k.encode(), v]) for k,v in groups if v is not None]
|
||||
index = json.dumps({"strings":list(scache), "dtypeSize":dtype_size, "markers":[{"ts":int(e.ts-start_ts), **e.arg} for e in markers]}).encode()
|
||||
return struct.pack("<IQII", unwrap(end_ts)-start_ts, max(peaks,default=0), len(index), len(ret))+index+b"".join(ret)
|
||||
|
||||
Reference in New Issue
Block a user