mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
amd: sqtt on gfx12 (#12564)
* amd: sqtt on gfx12 * cleaner * thi * and this * ops * ugh * back * rm this * rm
This commit is contained in:
@@ -156,6 +156,9 @@ class RGP:
|
||||
sqtt_events = [x for x in profile if isinstance(x, ProfileSQTTEvent) and x.device == device_event.device]
|
||||
if len(sqtt_events) == 0: raise RuntimeError(f"Device {device_event.device} doesn't contain SQTT data")
|
||||
device_props = sqtt_events[0].props
|
||||
gfx_ver = device_props['gfx_target_version'] // 10000
|
||||
gfx_iplvl = getattr(sqtt, f"SQTT_GFXIP_LEVEL_GFXIP_{device_props['gfx_target_version']//10000}_{(device_props['gfx_target_version']//100)%100}",
|
||||
getattr(sqtt, f"SQTT_GFXIP_LEVEL_GFXIP_{device_props['gfx_target_version']//10000}", None))
|
||||
sqtt_itrace_enabled = any([event.itrace for event in sqtt_events])
|
||||
sqtt_itrace_masked = not all_same([event.itrace for event in sqtt_events])
|
||||
sqtt_itrace_se_mask = functools.reduce(lambda a,b: a|b, [int(event.itrace) << event.se for event in sqtt_events], 0) if sqtt_itrace_masked else 0
|
||||
@@ -193,7 +196,7 @@ class RGP:
|
||||
flags=0,
|
||||
trace_shader_core_clock=0x93f05080,
|
||||
trace_memory_clock=0x4a723a40,
|
||||
device_id={110000: 0x744c, 110003: 0x7480}[device_props['gfx_target_version']],
|
||||
device_id={110000: 0x744c, 110003: 0x7480, 120001: 0x7550}[device_props['gfx_target_version']],
|
||||
device_revision_id=0xc8,
|
||||
vgprs_per_simd=1536,
|
||||
sgprs_per_simd=128*16,
|
||||
@@ -207,7 +210,7 @@ class RGP:
|
||||
sgpr_alloc_granularity=128,
|
||||
hardware_contexts=8,
|
||||
gpu_type=sqtt.SQTT_GPU_TYPE_DISCRETE,
|
||||
gfxip_level=sqtt.SQTT_GFXIP_LEVEL_GFXIP_11_0,
|
||||
gfxip_level=gfx_iplvl,
|
||||
gpu_index=0,
|
||||
gds_size=0,
|
||||
gds_per_shader_engine=0,
|
||||
@@ -258,7 +261,7 @@ class RGP:
|
||||
major_version=0, minor_version=2,
|
||||
),
|
||||
shader_engine_index=sqtt_event.se,
|
||||
sqtt_version=sqtt.SQTT_VERSION_3_2,
|
||||
sqtt_version={11: sqtt.SQTT_VERSION_3_2, 12: sqtt.SQTT_VERSION_3_3}.get(gfx_ver),
|
||||
_0=sqtt.union_sqtt_file_chunk_sqtt_desc_0(
|
||||
v1=sqtt.struct_sqtt_file_chunk_sqtt_desc_0_v1(
|
||||
instrumentation_spec_version=1,
|
||||
|
||||
@@ -20,14 +20,15 @@ class InstInfo:
|
||||
class _ROCParseCtx:
|
||||
def __init__(self, sqtt_evs:list[ProfileSQTTEvent], prog_evs:list[ProfileProgramEvent]):
|
||||
self.sqtt_evs, self.prog_evs = iter(sqtt_evs), prog_evs
|
||||
self.wave_events = {}
|
||||
self.wave_events, self.disasms, self.addr2prg = {}, {}, {}
|
||||
|
||||
for prog in prog_evs:
|
||||
for addr, info in comgr_get_address_table(prog.lib).items():
|
||||
self.disasms[prog.base + addr] = info
|
||||
self.addr2prg[prog.base + addr] = prog
|
||||
|
||||
def next_sqtt(self): return next(self.sqtt_evs, None)
|
||||
def find_program(self, idx): return self.prog_evs[idx]
|
||||
def get_instr_info(self, idx, exec_addr): return self.disasm_program(idx)[exec_addr - self.find_program(idx).base]
|
||||
|
||||
@functools.lru_cache(None)
|
||||
def disasm_program(self, idx): return comgr_get_address_table(self.find_program(idx).lib)
|
||||
def find_program(self, addr): return self.addr2prg[addr]
|
||||
|
||||
def on_occupancy_ev(self, ev):
|
||||
if DEBUG >= 4: print("OCC", ev.time, ev.cu, ev.simd, ev.wave_id, ev.start)
|
||||
@@ -39,10 +40,10 @@ class _ROCParseCtx:
|
||||
for j in range(ev.instructions_size):
|
||||
inst_ev = ev.instructions_array[j]
|
||||
inst_typ = rocprof.rocprofiler_thread_trace_decoder_inst_category_t__enumvalues[inst_ev.category]
|
||||
asm.setdefault(inst_ev.pc.address, InstInfo(typ=inst_typ, inst=self.get_instr_info(inst_ev.pc.code_object_id, inst_ev.pc.address)[0]))
|
||||
asm.setdefault(inst_ev.pc.address, InstInfo(typ=inst_typ, inst=self.disasms[inst_ev.pc.address][0]))
|
||||
asm[inst_ev.pc.address].on_ev(inst_ev)
|
||||
|
||||
self.wave_events[(self.find_program(ev.instructions_array[0].pc.code_object_id).name, ev.wave_id, ev.cu, ev.simd)] = asm
|
||||
self.wave_events[(self.find_program(ev.instructions_array[0].pc.address).name, ev.wave_id, ev.cu, ev.simd)] = asm
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
@@ -78,7 +79,7 @@ if __name__ == "__main__":
|
||||
|
||||
@rocprof.rocprof_trace_decoder_isa_callback_t
|
||||
def isa_cb(instr_ptr, mem_size_ptr, size_ptr, pc, data_ptr):
|
||||
instr, mem_size_ptr[0] = ROCParseCtx.get_instr_info(pc.code_object_id, pc.address)
|
||||
instr, mem_size_ptr[0] = ROCParseCtx.disasms[pc.address]
|
||||
|
||||
# this is the number of bytes to next instruction, set to 0 for end_pgm
|
||||
if instr == "s_endpgm": mem_size_ptr[0] = 0
|
||||
|
||||
@@ -7,7 +7,8 @@
|
||||
# POINTER_SIZE is: 8
|
||||
# LONGDOUBLE_SIZE is: 16
|
||||
#
|
||||
import ctypes, tinygrad.helpers.fetch as tgfetch
|
||||
import ctypes
|
||||
from tinygrad.helpers import fetch
|
||||
|
||||
|
||||
class AsDictMixin:
|
||||
@@ -155,7 +156,7 @@ class FunctionFactoryStub:
|
||||
# You can either re-run clan2py with -l /path/to/library.so
|
||||
# Or manually fix this by comment the ctypes.CDLL loading
|
||||
_libraries = {}
|
||||
_libraries['FIXME_STUB'] = ctypes.CDLL(str(tgfetch('https://github.com/ROCm/rocprof-trace-decoder/raw/5420409ad0963b2d76450add067b9058493ccbd0/releases/linux_glibc_2_28_x86_64/librocprof-trace-decoder.so'))) # ctypes.CDLL('FIXME_STUB')
|
||||
_libraries['FIXME_STUB'] = ctypes.CDLL(str(fetch('https://github.com/ROCm/rocprof-trace-decoder/raw/5420409ad0963b2d76450add067b9058493ccbd0/releases/linux_glibc_2_28_x86_64/librocprof-trace-decoder.so'))) # ctypes.CDLL('FIXME_STUB')
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@ enum sqtt_version
|
||||
SQTT_VERSION_2_3 = 0x6, /* GFX9 */
|
||||
SQTT_VERSION_2_4 = 0x7, /* GFX10+ */
|
||||
SQTT_VERSION_3_2 = 0xb, /* GFX11+ */
|
||||
SQTT_VERSION_3_3 = 0xc, /* GFX12+ */
|
||||
};
|
||||
|
||||
enum sqtt_file_chunk_type
|
||||
@@ -144,6 +145,8 @@ enum sqtt_gfxip_level
|
||||
SQTT_GFXIP_LEVEL_GFXIP_10_1 = 0x7,
|
||||
SQTT_GFXIP_LEVEL_GFXIP_10_3 = 0x9,
|
||||
SQTT_GFXIP_LEVEL_GFXIP_11_0 = 0xc,
|
||||
SQTT_GFXIP_LEVEL_GFXIP_11_5 = 0xd,
|
||||
SQTT_GFXIP_LEVEL_GFXIP_12 = 0x10,
|
||||
};
|
||||
|
||||
enum sqtt_memory_type
|
||||
@@ -427,6 +430,8 @@ enum elf_gfxip_level
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x04e,
|
||||
};
|
||||
|
||||
struct sqtt_file_chunk_spm_db {
|
||||
|
||||
Reference in New Issue
Block a user