viz: SE : CU : SIMD : WAVE in sqtt timeline (#13404)

* wave id in device rows

* SE : CU : SIMD : WAVE

* automatic width

* better styling

* rm the blue

* sort
This commit is contained in:
qazal
2025-11-21 15:42:29 +08:00
committed by GitHub
parent dabb02767f
commit 0de1b24154
3 changed files with 11 additions and 13 deletions

View File

@@ -270,8 +270,6 @@
font-size: 10px;
}
#device-list > div {
min-height: 32px;
width: 134px;
overflow-x: auto;
overflow-y: hidden;
white-space: nowrap;

View File

@@ -158,7 +158,7 @@ const formatUnit = (d, unit="") => d3.format(".3~s")(d)+unit;
const colorScheme = {TINY:new Map([["Schedule","#1b5745"],["get_program","#1d2e62"],["compile","#63b0cd"],["DEFAULT","#354f52"]]),
DEFAULT:["#2b2e39", "#2c2f3a", "#31343f", "#323544", "#2d303a", "#2e313c", "#343746", "#353847", "#3c4050", "#404459", "#444862", "#4a4e65"],
BUFFER:["#342483", "#3E2E94", "#4938A4", "#5442B4", "#5E4CC2", "#674FCA"], SIMD:["#3600f0"],
BUFFER:["#342483", "#3E2E94", "#4938A4", "#5442B4", "#5E4CC2", "#674FCA"], SE:["#2b2e39"],
CATEGORICAL:["#ff8080", "#F4A261", "#C8F9D4", "#8D99AE", "#F4A261", "#ffffa2", "#ffffc0", "#87CEEB"],}
const cycleColors = (lst, i) => lst[i%lst.length];
@@ -200,7 +200,7 @@ function focusShape(shape) {
const EventTypes = { EXEC:0, BUF:1 };
async function renderProfiler(path, unit) {
async function renderProfiler(path, unit, opts) {
displaySelection("#profiler");
metadata.replaceChildren(shapeMetadata.get(focusedShape) ?? "");
// layout once!
@@ -236,7 +236,7 @@ async function renderProfiler(path, unit) {
for (let i=0; i<layoutsLen; i++) {
const nameLen = view.getUint8(offset, true); offset += 1;
const k = textDecoder.decode(new Uint8Array(buf, offset, nameLen)); offset += nameLen;
const div = deviceList.append("div").attr("id", k).text(k).style("padding", padding+"px");
const div = deviceList.append("div").attr("id", k).text(k).style("padding", padding+"px").style("width", opts.width).style("min-height", opts.height);
const { y:baseY, height:baseHeight } = rect(div.node());
const colors = colorScheme[k.split(":")[0]] ?? colorScheme.DEFAULT;
const offsetY = baseY-canvasTop+padding/2;
@@ -705,7 +705,7 @@ async function main() {
if (url.pathname+url.search !== ckey) e.close();
else if (e.readyState === EventSource.OPEN) activeSrc = e;
}
if (ctx.name === "Profiler") return renderProfiler("/get_profile", "realtime");
if (ctx.name === "Profiler") return renderProfiler("/get_profile", "realtime", { width:"132px", height:"32px" });
if (workerUrl == null) await initWorker();
if (ckey in cache) {
ret = cache[ckey];
@@ -713,7 +713,7 @@ async function main() {
// ** Disassembly view
if (!ckey.startsWith("/rewrites")) {
if (!(ckey in cache)) cache[ckey] = ret = await fetchValue(ckey);
if (ret instanceof ArrayBuffer) return renderProfiler(ckey, "clk"); // cycles on the x axis
if (ret instanceof ArrayBuffer) return renderProfiler(ckey, "clk", { height:"16px" }); // cycles on the x axis
displaySelection("#custom");
metadata.innerHTML = "";
const root = d3.create("div").classed("raw-text", true).node();

View File

@@ -235,8 +235,8 @@ def load_sqtt(profile:list[ProfileEvent]) -> None:
# Stall: The total number of cycles the hardware pipe couldn't issue an instruction.
# Duration: Total latency in cycles, defined as "Stall time + Issue time" for gfx9 or "Stall time + Execute time" for gfx10+.
for w in waves:
units.add(row:=f"SIMD:{w.simd} CU:{w.cu} SE:{w.se}")
events.append(ProfileRangeEvent(row, wave_name:=f"wave {w.wave_id}", Decimal(w.begin_time), Decimal(w.end_time)))
units.add(row:=f"SE:{w.se} CU:{w.cu} SIMD:{w.simd} WAVE:{w.wave_id}")
events.append(ProfileRangeEvent(row, wave_name:="wave", Decimal(w.begin_time), Decimal(w.end_time)))
rows, prev_instr = [], w.begin_time
for i,e in enumerate(w.insts):
rows.append((e.inst, e.time, max(0, e.time-prev_instr), e.dur, e.stall, str(e.typ).split("_")[-1]))
@@ -246,10 +246,10 @@ def load_sqtt(profile:list[ProfileEvent]) -> None:
steps.append(create_step(wave_name, ("/counters", len(ctxs), len(steps)), depth=2,
data={"rows":rows, "cols":["Instruction", "Clk", "Idle", "Duration", "Stall", "Type"], "summary":summary}))
events = [ProfilePointEvent(unit, "start", unit, ts=Decimal(0)) for unit in units]+events
first["data"] = {"value":get_profile(events), "content_type":"application/octet-stream"}
first["data"] = {"value":get_profile(events, lambda k:tuple(int(x.split(":")[1]) for x in k.split())), "content_type":"application/octet-stream"}
ctxs.append({"name":"Counters", "steps":steps})
def get_profile(profile:list[ProfileEvent]) -> bytes|None:
def get_profile(profile:list[ProfileEvent], sort_fn:Callable[[str], Any]|None=None) -> bytes|None:
# start by getting the time diffs
for ev in profile:
if isinstance(ev,ProfileDeviceEvent): device_ts_diffs[ev.device] = (ev.comp_tdiff, ev.copy_tdiff if ev.copy_tdiff is not None else ev.comp_tdiff)
@@ -275,8 +275,8 @@ def get_profile(profile:list[ProfileEvent]) -> bytes|None:
scache:dict[str, int] = {}
peaks:list[int] = []
dtype_size:dict[str, int] = {}
for k,v in dev_events.items():
v.sort(key=lambda e:e[0])
for k in sorted(dev_events, key=sort_fn) if sort_fn else dev_events:
(v:=dev_events[k]).sort(key=lambda e:e[0])
layout[k] = timeline_layout(v, start_ts, scache)
layout[f"{k} Memory"] = mem_layout(v, start_ts, unwrap(end_ts), peaks, dtype_size, scache)
groups = sorted(layout.items(), key=lambda x: '' if len(ss:=x[0].split(" ")) == 1 else ss[1])