mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
sqtt: add cycle count to rdna3 enums (#15473)
* update rdna3 sqtt enums to include cycle_count * dispatch_to_exec
This commit is contained in:
@@ -131,25 +131,25 @@ def _init_sqtt_encoder():
|
||||
SOPPOp3.S_CBRANCH_EXECZ.value, SOPPOp3.S_CBRANCH_EXECNZ.value}
|
||||
|
||||
# VALU sub-classification patterns
|
||||
_VALU_TRANS_RE = re.compile(r'V_(EXP|LOG|RCP|RSQ|SQRT|SIN|COS|CEIL|FLOOR|TRUNC|RNDNE|FRACT|FREXP)_')
|
||||
_VALU_64_SHIFT_RE = re.compile(r'V_(LSHLREV|LSHRREV|ASHRREV)_(B|I)64')
|
||||
_VALU_MAD64_RE = re.compile(r'V_MAD_(U|I)64')
|
||||
_VALU_64_RE = re.compile(r'V_\w+_F64')
|
||||
_VALUT_4_RE = re.compile(r'V_(EXP|LOG|RCP|RSQ|SQRT|SIN|COS|CEIL|FLOOR|TRUNC|RNDNE|FRACT|FREXP)_')
|
||||
_VALUB_2_RE = re.compile(r'V_(LSHLREV|LSHRREV|ASHRREV)_(B|I)64')
|
||||
_VALUB_4_RE = re.compile(r'V_MAD_(U|I)64')
|
||||
_VALUB_16_RE = re.compile(r'V_\w+_F64')
|
||||
|
||||
def _valu_op(op_name: str) -> InstOp|None:
|
||||
if 'CMPX' in op_name: return InstOp.VALU_CMPX
|
||||
if _VALU_64_SHIFT_RE.search(op_name): return InstOp.VALU_64_SHIFT
|
||||
if _VALU_MAD64_RE.search(op_name): return InstOp.VALU_MAD64
|
||||
if _VALU_64_RE.search(op_name): return InstOp.VALU_64
|
||||
if _VALU_TRANS_RE.search(op_name): return InstOp.VALU_TRANS
|
||||
if 'CMPX' in op_name: return InstOp.VALU1_WR_EXEC
|
||||
if _VALUB_2_RE.search(op_name): return InstOp.VALUB_2
|
||||
if _VALUB_4_RE.search(op_name): return InstOp.VALUB_4
|
||||
if _VALUB_16_RE.search(op_name): return InstOp.VALUB_16
|
||||
if _VALUT_4_RE.search(op_name): return InstOp.VALUT_4
|
||||
return None
|
||||
|
||||
def _mem_op(t, op_name: str) -> InstOp:
|
||||
is_store = "STORE" in op_name
|
||||
if issubclass(t, _DS): return InstOp.LDS_STORE if is_store else InstOp.LDS_LOAD
|
||||
if issubclass(t, _GLOBAL): return InstOp.GLOBAL_STORE if is_store else InstOp.GLOBAL_LOAD
|
||||
if issubclass(t, _FLAT): return InstOp.FLAT_STORE if is_store else InstOp.FLAT_LOAD
|
||||
if issubclass(t, _SCRATCH): return InstOp.FLAT_STORE if is_store else InstOp.FLAT_LOAD
|
||||
if issubclass(t, _DS): return InstOp.LDS_WR_2 if is_store else InstOp.LDS_RD
|
||||
if issubclass(t, _GLOBAL): return InstOp.SGMEM_WR_2 if is_store else InstOp.SGMEM_RD_1
|
||||
if issubclass(t, _FLAT): return InstOp.FLAT_WR_3 if is_store else InstOp.FLAT_RD_2
|
||||
if issubclass(t, _SCRATCH): return InstOp.FLAT_WR_3 if is_store else InstOp.FLAT_RD_2
|
||||
return InstOp.SALU
|
||||
|
||||
nibbles: list[int] = []
|
||||
@@ -174,7 +174,7 @@ def _init_sqtt_encoder():
|
||||
op = _valu_op(op_name)
|
||||
if op is None: _emit_nibbles(nibbles, VALUINST, delta=1, wave=w)
|
||||
else: _emit_nibbles(nibbles, INST, delta=1, wave=w, op=op)
|
||||
elif issubclass(inst_type, _SMEM): _emit_nibbles(nibbles, INST, delta=1, wave=w, op=InstOp.SMEM)
|
||||
elif issubclass(inst_type, _SMEM): _emit_nibbles(nibbles, INST, delta=1, wave=w, op=InstOp.SMEM_RD)
|
||||
else: _emit_nibbles(nibbles, INST, delta=1, wave=w, op=_mem_op(inst_type, op_name))
|
||||
|
||||
def finish(wave_id: int):
|
||||
|
||||
@@ -44,62 +44,62 @@ class InstOp(Enum):
|
||||
OTHER_ range follows same pattern but values overlap differently.
|
||||
"""
|
||||
SALU = 0x0
|
||||
SMEM = 0x1
|
||||
SMEM_RD = 0x1
|
||||
JUMP = 0x3 # branch taken
|
||||
JUMP_NO = 0x4 # branch not taken
|
||||
CALL = 0x5 # s_call_b64
|
||||
MESSAGE = 0x9
|
||||
VALU_TRANS = 0xb # transcendental: exp, log, rcp, sqrt, sin, cos
|
||||
VALU_64_SHIFT = 0xd # 64-bit shifts: lshl, lshr, ashr
|
||||
VALU_MAD64 = 0xe # 64-bit multiply-add
|
||||
VALU_64 = 0xf # 64-bit: add, mul, fma, rcp, sqrt, rounding, frexp, div helpers
|
||||
VALUT_4 = 0xb # transcendental: exp, log, rcp, sqrt, sin, cos
|
||||
VALUB_2 = 0xd # 64-bit shifts: lshl, lshr, ashr
|
||||
VALUB_4 = 0xe # 64-bit multiply-add
|
||||
VALUB_16 = 0xf # 64-bit: add, mul, fma, rcp, sqrt, rounding, frexp, div helpers
|
||||
VINTERP = 0x12 # interpolation: v_interp_p10_f32, v_interp_p2_f32
|
||||
BARRIER = 0x13
|
||||
|
||||
# FLAT memory ops on traced SIMD (0x1x range)
|
||||
FLAT_LOAD = 0x1c
|
||||
FLAT_STORE = 0x1d
|
||||
FLAT_STORE_64 = 0x1e
|
||||
FLAT_STORE_96 = 0x1f
|
||||
FLAT_STORE_128 = 0x20
|
||||
FLAT_RD_2 = 0x1c
|
||||
FLAT_WR_3 = 0x1d
|
||||
FLAT_WR_4 = 0x1e
|
||||
FLAT_WR_5 = 0x1f
|
||||
FLAT_WR_6 = 0x20
|
||||
|
||||
# GLOBAL memory ops on traced SIMD (0x2x range)
|
||||
GLOBAL_LOAD = 0x21 # saddr=SGPR, all sizes
|
||||
GLOBAL_LOAD_VADDR = 0x22 # saddr=NULL, all sizes
|
||||
GLOBAL_STORE = 0x24 # saddr=SGPR, 32-bit
|
||||
GLOBAL_STORE_64 = 0x25 # saddr=SGPR 64 or saddr=NULL 32
|
||||
GLOBAL_STORE_96 = 0x26 # saddr=SGPR 96 or saddr=NULL 64
|
||||
GLOBAL_STORE_128 = 0x27 # saddr=SGPR 128 or saddr=NULL 96
|
||||
GLOBAL_STORE_VADDR_128 = 0x28 # saddr=NULL, 128-bit
|
||||
SGMEM_RD_1 = 0x21 # saddr=SGPR, all sizes
|
||||
SGMEM_RD_2 = 0x22 # saddr=NULL, all sizes
|
||||
SGMEM_WR_2 = 0x24 # saddr=SGPR, 32-bit
|
||||
SGMEM_WR_3 = 0x25 # saddr=SGPR 64 or saddr=NULL 32
|
||||
SGMEM_WR_4 = 0x26 # saddr=SGPR 96 or saddr=NULL 64
|
||||
SGMEM_WR_5 = 0x27 # saddr=SGPR 128 or saddr=NULL 96
|
||||
SGMEM_WR_6 = 0x28 # saddr=NULL, 128-bit
|
||||
|
||||
# LDS ops on traced SIMD
|
||||
LDS_LOAD = 0x29
|
||||
LDS_ATOMIC = 0x2a # ds_append, ds_consume, ds_store_addtid_b32
|
||||
LDS_STORE = 0x2b
|
||||
LDS_STORE_64 = 0x2c
|
||||
LDS_STORE_96 = 0x2d
|
||||
LDS_STORE_128 = 0x2e
|
||||
LDS_RD = 0x29
|
||||
LDS_WR_1 = 0x2a # ds_append, ds_consume, ds_store_addtid_b32
|
||||
LDS_WR_2 = 0x2b
|
||||
LDS_WR_3 = 0x2c
|
||||
LDS_WR_4 = 0x2d
|
||||
LDS_WR_5 = 0x2e
|
||||
|
||||
# Memory ops on other SIMD (0x5x range)
|
||||
OTHER_LDS_LOAD = 0x50
|
||||
OTHER_LDS_STORE = 0x51
|
||||
OTHER_LDS_STORE_64 = 0x52
|
||||
OTHER_LDS_STORE_128 = 0x54
|
||||
OTHER_FLAT_LOAD = 0x55
|
||||
OTHER_FLAT_STORE = 0x56
|
||||
OTHER_FLAT_STORE_64 = 0x57
|
||||
OTHER_FLAT_STORE_96 = 0x58
|
||||
OTHER_FLAT_STORE_128 = 0x59
|
||||
OTHER_GLOBAL_LOAD = 0x5a # saddr=SGPR, all sizes
|
||||
OTHER_GLOBAL_LOAD_VADDR = 0x5b # saddr=NULL or saddr=SGPR store 32
|
||||
OTHER_GLOBAL_STORE_64 = 0x5c # saddr=SGPR 64 or saddr=NULL 32
|
||||
OTHER_GLOBAL_STORE_96 = 0x5d # saddr=SGPR 96 or saddr=NULL 64
|
||||
OTHER_GLOBAL_STORE_128 = 0x5e # saddr=SGPR 128 or saddr=NULL 96
|
||||
OTHER_GLOBAL_STORE_VADDR_128 = 0x5f # saddr=NULL, 128-bit
|
||||
OTHER_LDS_1 = 0x50
|
||||
OTHER_LDS_2 = 0x51
|
||||
OTHER_LDS_3 = 0x52
|
||||
OTHER_LDS_5 = 0x54
|
||||
OTHER_FLAT_2 = 0x55
|
||||
OTHER_FLAT_3 = 0x56
|
||||
OTHER_FLAT_4 = 0x57
|
||||
OTHER_FLAT_5 = 0x58
|
||||
OTHER_FLAT_6 = 0x59
|
||||
OTHER_VMEM_1 = 0x5a # saddr=SGPR, all sizes
|
||||
OTHER_VMEM_2 = 0x5b # saddr=NULL or saddr=SGPR store 32
|
||||
OTHER_VMEM_3 = 0x5c # saddr=SGPR 64 or saddr=NULL 32
|
||||
OTHER_VMEM_4 = 0x5d # saddr=SGPR 96 or saddr=NULL 64
|
||||
OTHER_VMEM_5 = 0x5e # saddr=SGPR 128 or saddr=NULL 96
|
||||
OTHER_VMEM_6 = 0x5f # saddr=NULL, 128-bit
|
||||
|
||||
# EXEC-modifying ops (0x7x range)
|
||||
SALU_SAVEEXEC = 0x72 # s_*_saveexec_b32/b64
|
||||
VALU_CMPX = 0x73 # v_cmpx_*
|
||||
SALU_WR_EXEC = 0x72 # s_*_saveexec_b32/b64
|
||||
VALU1_WR_EXEC = 0x73 # v_cmpx_*
|
||||
|
||||
class InstOpRDNA4(Enum):
|
||||
"""SQTT instruction operation types for RDNA4 (gfx1200). Different encoding from RDNA3."""
|
||||
|
||||
@@ -349,8 +349,8 @@ def sqtt_timeline(data:bytes, lib:bytes, target:str) -> Generator[ProfileEvent,
|
||||
NS_PER_TICK = 10 # 100MHz
|
||||
prev_pair:tuple[int, int]|None = None # (shader, realtime)
|
||||
is_cdna = target.startswith("gfx9")
|
||||
dispatch_to_exec = {"WMMA":"VALU", "VALU":"VALU", "VALUINST":"VALU", "VINTERP":"VALU", "GLOBAL":"VMEM", "FLAT":"VMEM", "LDS":"LDS", "SALU":"SALU",
|
||||
"SMEM":"SALU", "VMEM":"VMEM"}
|
||||
dispatch_to_exec = {"WMMA":"VALU", "VALU":"VALU", "VALU1":"VALU", "VALUT":"VALU", "VALUB":"VALU", "VALUINST":"VALU", "VINTERP":"VALU",
|
||||
"SGMEM":"VMEM", "FLAT":"VMEM", "LDS":"LDS", "SALU":"SALU", "SMEM":"SALU", "VMEM":"VMEM"}
|
||||
def add(name:str, p:PacketType, op:str|None=None, wave:int|None=None, info:InstructionInfo|None=None) -> Generator[ProfileEvent, None, None]:
|
||||
row = f"WAVE:{wave}" if (wave:=getattr(p, "wave", wave)) is not None else f"{p.__class__.__name__}:0 {name}"
|
||||
if row not in row_ends: yield ProfilePointEvent(row, "JSON", "pcMap", pc_map, ts=Decimal(0))
|
||||
|
||||
Reference in New Issue
Block a user