mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
@@ -35,7 +35,7 @@ class InstInfo:
|
||||
hit:int=0
|
||||
lat:int=0
|
||||
stall:int=0
|
||||
def __str__(self): return f"{self.inst:>20} hits:{self.typ:>6} hits:{self.hit:>6} latency:{self.lat:>6} stall:{self.stall:>6}"
|
||||
def __str__(self): return f"{self.inst:>20} type:{self.typ:>6} hits:{self.hit:>6} latency:{self.lat:>6} stall:{self.stall:>6}"
|
||||
|
||||
def on_ev(self, ev):
|
||||
self.hit, self.lat, self.stall = self.hit + 1, self.lat + ev.duration, self.stall + ev.stall
|
||||
@@ -61,6 +61,8 @@ class WaveExec:
|
||||
wave_id:int
|
||||
cu:int
|
||||
simd:int
|
||||
begin_time:int
|
||||
end_time:int
|
||||
insts:list[InstExec]
|
||||
|
||||
class _ROCParseCtx:
|
||||
@@ -99,7 +101,7 @@ class _ROCParseCtx:
|
||||
|
||||
if ev.instructions_size > 0:
|
||||
self.wave_events[key:=PrgExec(unwrap(self.active_kern), ev.wave_id, ev.cu, ev.simd)] = asm
|
||||
self.inst_execs.setdefault(key.name, []).append(WaveExec(ev.wave_id, ev.cu, ev.simd, inst_execs))
|
||||
self.inst_execs.setdefault(key.name, []).append(WaveExec(ev.wave_id, ev.cu, ev.simd, ev.begin_time, ev.end_time, inst_execs))
|
||||
|
||||
def decode(profile:list[ProfileEvent]) -> _ROCParseCtx:
|
||||
dev_events:dict[str, ProfileDeviceEvent] = {}
|
||||
|
||||
@@ -217,12 +217,23 @@ def load_sqtt(profile:list[ProfileEvent]) -> None:
|
||||
if (r:=ref_map.get(name)): name = ctxs[r]["name"]
|
||||
steps.append({"name":name, "depth":0, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters",
|
||||
"data":{"src":trace.keys[r].ret.src if r else name, "lang":"cpp"}})
|
||||
|
||||
# Idle: The total time gap between the completion of previous instruction and the beginning of the current instruction.
|
||||
# The idle time can be caused by:
|
||||
# * Arbiter loss
|
||||
# * Source or destination register dependency
|
||||
# * Instruction cache miss
|
||||
# Stall: The total number of cycles the hardware pipe couldn't issue an instruction.
|
||||
# Duration: Total latency in cycles, defined as "Stall time + Issue time" for gfx9 or "Stall time + Execute time" for gfx10+.
|
||||
for w in waves:
|
||||
rows = [(e.inst, e.time, e.time-(w.insts[i-1].time if i else 0), e.dur, e.stall, str(e.typ).split("_")[-1]) for i,e in enumerate(w.insts)]
|
||||
summary = [{"label":"Total Cycles", "value":w.insts[-1].time-w.insts[0].time if w.insts else 0}, {"label":"CU", "value":w.cu},
|
||||
rows, prev_instr = [], w.begin_time
|
||||
for i,e in enumerate(w.insts):
|
||||
rows.append((e.inst, e.time, max(0, e.time-prev_instr), e.dur, e.stall, str(e.typ).split("_")[-1]))
|
||||
prev_instr = max(prev_instr, e.time + e.dur)
|
||||
summary = [{"label":"Total Cycles", "value":w.end_time-w.begin_time}, {"label":"CU", "value":w.cu},
|
||||
{"label":"SIMD", "value":w.simd}]
|
||||
steps.append({"name":f"Wave {w.wave_id}", "depth":1, "query":f"/render?ctx={len(ctxs)}&step={len(steps)}&fmt=counters",
|
||||
"data":{"rows":rows, "cols":["Instruction", "Clk", "Wait", "Duration", "Stall", "Type"], "summary":summary}})
|
||||
"data":{"rows":rows, "cols":["Instruction", "Clk", "Idle", "Duration", "Stall", "Type"], "summary":summary}})
|
||||
ctxs.append({"name":"Counters", "steps":steps})
|
||||
|
||||
def get_profile(profile:list[ProfileEvent]) -> bytes|None:
|
||||
|
||||
Reference in New Issue
Block a user