sqtt: add cycle count to rdna3 enums (#15473)

* update rdna3 sqtt enums to include cycle_count

* dispatch_to_exec
This commit is contained in:
qazal
2026-03-25 16:19:54 +02:00
committed by GitHub
parent 142bf11926
commit 60bd546593
3 changed files with 56 additions and 56 deletions

View File

@@ -131,25 +131,25 @@ def _init_sqtt_encoder():
SOPPOp3.S_CBRANCH_EXECZ.value, SOPPOp3.S_CBRANCH_EXECNZ.value}
# VALU sub-classification patterns
_VALU_TRANS_RE = re.compile(r'V_(EXP|LOG|RCP|RSQ|SQRT|SIN|COS|CEIL|FLOOR|TRUNC|RNDNE|FRACT|FREXP)_')
_VALU_64_SHIFT_RE = re.compile(r'V_(LSHLREV|LSHRREV|ASHRREV)_(B|I)64')
_VALU_MAD64_RE = re.compile(r'V_MAD_(U|I)64')
_VALU_64_RE = re.compile(r'V_\w+_F64')
_VALUT_4_RE = re.compile(r'V_(EXP|LOG|RCP|RSQ|SQRT|SIN|COS|CEIL|FLOOR|TRUNC|RNDNE|FRACT|FREXP)_')
_VALUB_2_RE = re.compile(r'V_(LSHLREV|LSHRREV|ASHRREV)_(B|I)64')
_VALUB_4_RE = re.compile(r'V_MAD_(U|I)64')
_VALUB_16_RE = re.compile(r'V_\w+_F64')
def _valu_op(op_name: str) -> InstOp|None:
if 'CMPX' in op_name: return InstOp.VALU_CMPX
if _VALU_64_SHIFT_RE.search(op_name): return InstOp.VALU_64_SHIFT
if _VALU_MAD64_RE.search(op_name): return InstOp.VALU_MAD64
if _VALU_64_RE.search(op_name): return InstOp.VALU_64
if _VALU_TRANS_RE.search(op_name): return InstOp.VALU_TRANS
if 'CMPX' in op_name: return InstOp.VALU1_WR_EXEC
if _VALUB_2_RE.search(op_name): return InstOp.VALUB_2
if _VALUB_4_RE.search(op_name): return InstOp.VALUB_4
if _VALUB_16_RE.search(op_name): return InstOp.VALUB_16
if _VALUT_4_RE.search(op_name): return InstOp.VALUT_4
return None
def _mem_op(t, op_name: str) -> InstOp:
is_store = "STORE" in op_name
if issubclass(t, _DS): return InstOp.LDS_STORE if is_store else InstOp.LDS_LOAD
if issubclass(t, _GLOBAL): return InstOp.GLOBAL_STORE if is_store else InstOp.GLOBAL_LOAD
if issubclass(t, _FLAT): return InstOp.FLAT_STORE if is_store else InstOp.FLAT_LOAD
if issubclass(t, _SCRATCH): return InstOp.FLAT_STORE if is_store else InstOp.FLAT_LOAD
if issubclass(t, _DS): return InstOp.LDS_WR_2 if is_store else InstOp.LDS_RD
if issubclass(t, _GLOBAL): return InstOp.SGMEM_WR_2 if is_store else InstOp.SGMEM_RD_1
if issubclass(t, _FLAT): return InstOp.FLAT_WR_3 if is_store else InstOp.FLAT_RD_2
if issubclass(t, _SCRATCH): return InstOp.FLAT_WR_3 if is_store else InstOp.FLAT_RD_2
return InstOp.SALU
nibbles: list[int] = []
@@ -174,7 +174,7 @@ def _init_sqtt_encoder():
op = _valu_op(op_name)
if op is None: _emit_nibbles(nibbles, VALUINST, delta=1, wave=w)
else: _emit_nibbles(nibbles, INST, delta=1, wave=w, op=op)
elif issubclass(inst_type, _SMEM): _emit_nibbles(nibbles, INST, delta=1, wave=w, op=InstOp.SMEM)
elif issubclass(inst_type, _SMEM): _emit_nibbles(nibbles, INST, delta=1, wave=w, op=InstOp.SMEM_RD)
else: _emit_nibbles(nibbles, INST, delta=1, wave=w, op=_mem_op(inst_type, op_name))
def finish(wave_id: int):

View File

@@ -44,62 +44,62 @@ class InstOp(Enum):
OTHER_ range follows same pattern but values overlap differently.
"""
SALU = 0x0
SMEM = 0x1
SMEM_RD = 0x1
JUMP = 0x3 # branch taken
JUMP_NO = 0x4 # branch not taken
CALL = 0x5 # s_call_b64
MESSAGE = 0x9
VALU_TRANS = 0xb # transcendental: exp, log, rcp, sqrt, sin, cos
VALU_64_SHIFT = 0xd # 64-bit shifts: lshl, lshr, ashr
VALU_MAD64 = 0xe # 64-bit multiply-add
VALU_64 = 0xf # 64-bit: add, mul, fma, rcp, sqrt, rounding, frexp, div helpers
VALUT_4 = 0xb # transcendental: exp, log, rcp, sqrt, sin, cos
VALUB_2 = 0xd # 64-bit shifts: lshl, lshr, ashr
VALUB_4 = 0xe # 64-bit multiply-add
VALUB_16 = 0xf # 64-bit: add, mul, fma, rcp, sqrt, rounding, frexp, div helpers
VINTERP = 0x12 # interpolation: v_interp_p10_f32, v_interp_p2_f32
BARRIER = 0x13
# FLAT memory ops on traced SIMD (0x1x range)
FLAT_LOAD = 0x1c
FLAT_STORE = 0x1d
FLAT_STORE_64 = 0x1e
FLAT_STORE_96 = 0x1f
FLAT_STORE_128 = 0x20
FLAT_RD_2 = 0x1c
FLAT_WR_3 = 0x1d
FLAT_WR_4 = 0x1e
FLAT_WR_5 = 0x1f
FLAT_WR_6 = 0x20
# GLOBAL memory ops on traced SIMD (0x2x range)
GLOBAL_LOAD = 0x21 # saddr=SGPR, all sizes
GLOBAL_LOAD_VADDR = 0x22 # saddr=NULL, all sizes
GLOBAL_STORE = 0x24 # saddr=SGPR, 32-bit
GLOBAL_STORE_64 = 0x25 # saddr=SGPR 64 or saddr=NULL 32
GLOBAL_STORE_96 = 0x26 # saddr=SGPR 96 or saddr=NULL 64
GLOBAL_STORE_128 = 0x27 # saddr=SGPR 128 or saddr=NULL 96
GLOBAL_STORE_VADDR_128 = 0x28 # saddr=NULL, 128-bit
SGMEM_RD_1 = 0x21 # saddr=SGPR, all sizes
SGMEM_RD_2 = 0x22 # saddr=NULL, all sizes
SGMEM_WR_2 = 0x24 # saddr=SGPR, 32-bit
SGMEM_WR_3 = 0x25 # saddr=SGPR 64 or saddr=NULL 32
SGMEM_WR_4 = 0x26 # saddr=SGPR 96 or saddr=NULL 64
SGMEM_WR_5 = 0x27 # saddr=SGPR 128 or saddr=NULL 96
SGMEM_WR_6 = 0x28 # saddr=NULL, 128-bit
# LDS ops on traced SIMD
LDS_LOAD = 0x29
LDS_ATOMIC = 0x2a # ds_append, ds_consume, ds_store_addtid_b32
LDS_STORE = 0x2b
LDS_STORE_64 = 0x2c
LDS_STORE_96 = 0x2d
LDS_STORE_128 = 0x2e
LDS_RD = 0x29
LDS_WR_1 = 0x2a # ds_append, ds_consume, ds_store_addtid_b32
LDS_WR_2 = 0x2b
LDS_WR_3 = 0x2c
LDS_WR_4 = 0x2d
LDS_WR_5 = 0x2e
# Memory ops on other SIMD (0x5x range)
OTHER_LDS_LOAD = 0x50
OTHER_LDS_STORE = 0x51
OTHER_LDS_STORE_64 = 0x52
OTHER_LDS_STORE_128 = 0x54
OTHER_FLAT_LOAD = 0x55
OTHER_FLAT_STORE = 0x56
OTHER_FLAT_STORE_64 = 0x57
OTHER_FLAT_STORE_96 = 0x58
OTHER_FLAT_STORE_128 = 0x59
OTHER_GLOBAL_LOAD = 0x5a # saddr=SGPR, all sizes
OTHER_GLOBAL_LOAD_VADDR = 0x5b # saddr=NULL or saddr=SGPR store 32
OTHER_GLOBAL_STORE_64 = 0x5c # saddr=SGPR 64 or saddr=NULL 32
OTHER_GLOBAL_STORE_96 = 0x5d # saddr=SGPR 96 or saddr=NULL 64
OTHER_GLOBAL_STORE_128 = 0x5e # saddr=SGPR 128 or saddr=NULL 96
OTHER_GLOBAL_STORE_VADDR_128 = 0x5f # saddr=NULL, 128-bit
OTHER_LDS_1 = 0x50
OTHER_LDS_2 = 0x51
OTHER_LDS_3 = 0x52
OTHER_LDS_5 = 0x54
OTHER_FLAT_2 = 0x55
OTHER_FLAT_3 = 0x56
OTHER_FLAT_4 = 0x57
OTHER_FLAT_5 = 0x58
OTHER_FLAT_6 = 0x59
OTHER_VMEM_1 = 0x5a # saddr=SGPR, all sizes
OTHER_VMEM_2 = 0x5b # saddr=NULL or saddr=SGPR store 32
OTHER_VMEM_3 = 0x5c # saddr=SGPR 64 or saddr=NULL 32
OTHER_VMEM_4 = 0x5d # saddr=SGPR 96 or saddr=NULL 64
OTHER_VMEM_5 = 0x5e # saddr=SGPR 128 or saddr=NULL 96
OTHER_VMEM_6 = 0x5f # saddr=NULL, 128-bit
# EXEC-modifying ops (0x7x range)
SALU_SAVEEXEC = 0x72 # s_*_saveexec_b32/b64
VALU_CMPX = 0x73 # v_cmpx_*
SALU_WR_EXEC = 0x72 # s_*_saveexec_b32/b64
VALU1_WR_EXEC = 0x73 # v_cmpx_*
class InstOpRDNA4(Enum):
"""SQTT instruction operation types for RDNA4 (gfx1200). Different encoding from RDNA3."""

View File

@@ -349,8 +349,8 @@ def sqtt_timeline(data:bytes, lib:bytes, target:str) -> Generator[ProfileEvent,
NS_PER_TICK = 10 # 100MHz
prev_pair:tuple[int, int]|None = None # (shader, realtime)
is_cdna = target.startswith("gfx9")
dispatch_to_exec = {"WMMA":"VALU", "VALU":"VALU", "VALUINST":"VALU", "VINTERP":"VALU", "GLOBAL":"VMEM", "FLAT":"VMEM", "LDS":"LDS", "SALU":"SALU",
"SMEM":"SALU", "VMEM":"VMEM"}
dispatch_to_exec = {"WMMA":"VALU", "VALU":"VALU", "VALU1":"VALU", "VALUT":"VALU", "VALUB":"VALU", "VALUINST":"VALU", "VINTERP":"VALU",
"SGMEM":"VMEM", "FLAT":"VMEM", "LDS":"LDS", "SALU":"SALU", "SMEM":"SALU", "VMEM":"VMEM"}
def add(name:str, p:PacketType, op:str|None=None, wave:int|None=None, info:InstructionInfo|None=None) -> Generator[ProfileEvent, None, None]:
row = f"WAVE:{wave}" if (wave:=getattr(p, "wave", wave)) is not None else f"{p.__class__.__name__}:0 {name}"
if row not in row_ends: yield ProfilePointEvent(row, "JSON", "pcMap", pc_map, ts=Decimal(0))