diff --git a/extra/sqtt/test_timing.py b/extra/sqtt/test_timing.py index f0f51516c3..898f4de91d 100644 --- a/extra/sqtt/test_timing.py +++ b/extra/sqtt/test_timing.py @@ -14,7 +14,7 @@ from tinygrad.uop.ops import UOp, Ops, KernelInfo, AddrSpace from tinygrad.engine.realize import CompiledRunner from tinygrad.device import Device, ProfileDeviceEvent -from extra.sqtt.roc import decode, InstExec, PrgExec +from extra.sqtt.roc import decode, WaveExec dev = Device[os.environ["DEV"]] @@ -36,7 +36,7 @@ def asm_kernel(instrs:list[str], l:int=1, g:int=1) -> Tensor: def save_sqtt(): # clear the old traces dev.profile_events.clear() - sqtt:dict[PrgExec, list[InstExec]] = {} + sqtt:dict[str, list[WaveExec]] = {} yield sqtt # decode sqtt if os.environ["DEV"] == "AMD": diff --git a/tinygrad/viz/serve.py b/tinygrad/viz/serve.py index 8a4207f3fa..d30496f349 100755 --- a/tinygrad/viz/serve.py +++ b/tinygrad/viz/serve.py @@ -215,9 +215,9 @@ def load_sqtt(profile:list[ProfileEvent]) -> None: if not rctx.inst_execs: return err("EMPTY SQTT OUTPUT", f"{len(sqtt_events)} SQTT events recorded, none got decoded") steps:list[dict] = [] for name,waves in rctx.inst_execs.items(): - if (r:=ref_map.get(name)): name = ctxs[r]["name"] - steps.append({"name":name, "depth":0, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters", - "data":{"src":trace.keys[r].ret.src if r else name, "lang":"cpp"}}) + prg = trace.keys[r].ret if (r:=ref_map.get(name)) else None + steps.append({"name":prg.name if prg is not None else name, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters", + "depth":0, "data":{"src":prg.src if prg is not None else name, "lang":"cpp"}}) # Idle: The total time gap between the completion of previous instruction and the beginning of the current instruction. # The idle time can be caused by: