roc: only save instruction execs (#13254)

This commit is contained in:
qazal
2025-11-13 21:28:40 +08:00
committed by GitHub
parent f9586b38ba
commit 006dea4c3e

View File

@@ -28,18 +28,6 @@ def llvm_disasm(arch:str, lib:bytes) -> dict[int, tuple[str, int]]:
cur_off += instr_sz
return addr_table
@dataclasses.dataclass
class InstInfo:
typ:str=""
inst:str=""
hit:int=0
lat:int=0
stall:int=0
def __str__(self): return f"{self.inst:>20} type:{self.typ:>6} hits:{self.hit:>6} latency:{self.lat:>6} stall:{self.stall:>6}"
def on_ev(self, ev):
self.hit, self.lat, self.stall = self.hit + 1, self.lat + ev.duration, self.stall + ev.stall
@dataclasses.dataclass(frozen=True)
class InstExec:
typ:str
@@ -48,14 +36,6 @@ class InstExec:
dur:int
time:int
@dataclasses.dataclass(frozen=True)
class PrgExec:
name:str
wave:int
cu:int
simd:int
def __str__(self): return f"{self.name},{self.wave},{self.cu},{self.simd}"
@dataclasses.dataclass(frozen=True)
class WaveExec:
wave_id:int
@@ -68,7 +48,6 @@ class WaveExec:
class _ROCParseCtx:
def __init__(self, dev_evs:dict[str, ProfileDeviceEvent], sqtt_evs:list[ProfileSQTTEvent], prog_evs:list[ProfileProgramEvent]):
self.dev_evs, self.sqtt_evs, self.prog_evs = dev_evs, iter(sqtt_evs), prog_evs
self.wave_events:dict[PrgExec, dict[int, InstInfo]] = {}
self.disasms:dict[tuple[str, int], tuple[str, int]] = {}
self.inst_execs:dict[str, list[WaveExec]] = {}
@@ -89,19 +68,15 @@ class _ROCParseCtx:
def on_wave_ev(self, ev):
if DEBUG >= 5: print("WAVE", ev.wave_id, self.active_se, ev.cu, ev.simd, ev.contexts, ev.begin_time, ev.end_time)
asm:dict[int, InstInfo] = {}
inst_execs:list[InstExec] = []
for j in range(ev.instructions_size):
inst_ev = ev.instructions_array[j]
inst_typ = rocprof.rocprofiler_thread_trace_decoder_inst_category_t__enumvalues[inst_ev.category]
inst_disasm = self.disasms[(unwrap(self.active_kern), unwrap(inst_ev.pc.address))][0]
asm.setdefault(inst_ev.pc.address, InstInfo(typ=inst_typ, inst=inst_disasm))
asm[inst_ev.pc.address].on_ev(inst_ev)
inst_execs.append(InstExec(inst_typ, inst_disasm, inst_ev.stall, inst_ev.duration, inst_ev.time))
if ev.instructions_size > 0:
self.wave_events[key:=PrgExec(unwrap(self.active_kern), ev.wave_id, ev.cu, ev.simd)] = asm
self.inst_execs.setdefault(key.name, []).append(WaveExec(ev.wave_id, ev.cu, ev.simd, ev.begin_time, ev.end_time, inst_execs))
self.inst_execs.setdefault(unwrap(self.active_kern), []).append(WaveExec(ev.wave_id, ev.cu, ev.simd, ev.begin_time, ev.end_time, inst_execs))
def decode(profile:list[ProfileEvent]) -> _ROCParseCtx:
dev_events:dict[str, ProfileDeviceEvent] = {}
@@ -159,7 +134,7 @@ if __name__ == "__main__":
with args.profile.open("rb") as f: profile = pickle.load(f)
rctx = decode(profile)
print('SQTT:', rctx.wave_events.keys())
print('SQTT:', rctx.inst_execs.keys())
for ev in profile:
if not isinstance(ev, ProfilePMCEvent): continue