assembly/amd: make pdf.py code shine (#14029)

* assembly/amd: make pdf.py code shine

* no merge

* pdf2 is the future

* something

* regen enums

* test

* work

* remove junk

* write

* pcode extraction

* pdf2 passes all tests

* simplify

* simpler pdf

* late filter

* remove hacks

* simplify pdf2.py

* field type

* remove defaults

* don't export srcenum

* simple pdf.py

* simpler

* cleaner

* less hack in PDF
This commit is contained in:
George Hotz
2026-01-05 18:49:40 -08:00
committed by GitHub
parent ea7b149ca5
commit 20653d2996
18 changed files with 4115 additions and 4035 deletions

View File

@@ -670,6 +670,10 @@ jobs:
deps: testing_minimal
amd: 'true'
python-version: '3.13'
- name: Verify AMD autogen is up to date
run: |
python -m extra.assembly.amd.pdf
git diff --exit-code extra/assembly/amd/autogen/
- name: Install LLVM 21
run: |
wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc
@@ -690,23 +694,6 @@ jobs:
- name: Run RDNA3 ops tests
run: SKIP_SLOW_TEST=1 AMD=1 PYTHON_REMU=1 MOCKGPU=1 AMD_LLVM=0 pytest -n=auto test/test_ops.py -k "test_sparse_categorical_crossentropy or test_tril"
testamdautogen:
name: AMD autogen
runs-on: ubuntu-24.04
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: rdna3-autogen
pydeps: "pdfplumber"
- name: Verify AMD autogen is up to date
run: |
python -m extra.assembly.amd.pdf --arch all
git diff --exit-code extra/assembly/amd/autogen/
testnvidia:
strategy:
fail-fast: false

View File

@@ -1,46 +1,6 @@
# autogenerated from AMD CDNA3+CDNA4 ISA PDF by pdf.py - do not edit
# autogenerated from AMD ISA PDF by pdf.py - do not edit
from enum import IntEnum
class SrcEnum(IntEnum):
S_ADD_U32 = 0
S_SUB_U32 = 1
S_ADD_I32 = 2
S_SUB_I32 = 3
S_ADDC_U32 = 4
S_SUBB_U32 = 5
S_MIN_I32 = 6
FLAT_SCRATCH_LO = 102
FLAT_SCRATCH_HI = 103
XNACK_MASK_LO = 104
XNACK_MASK_HI = 105
VCC_LO = 106
VCC_HI = 107
M0 = 124
EXEC_LO = 126
EXEC_HI = 127
ZERO = 128
DPP8 = 233
DPP8FI = 234
SHARED_BASE = 235
SHARED_LIMIT = 236
PRIVATE_BASE = 237
PRIVATE_LIMIT = 238
RESERVED = 239
POS_HALF = 240
NEG_HALF = 241
POS_ONE = 242
NEG_ONE = 243
POS_TWO = 244
NEG_TWO = 245
POS_FOUR = 246
NEG_FOUR = 247
INV_2PI = 248
DPP16 = 250
VCCZ = 251
EXECZ = 252
SCC = 253
LDS_DIRECT = 254
class DSOp(IntEnum):
DS_ADD_U32 = 0
DS_SUB_U32 = 1
@@ -155,12 +115,6 @@ class DSOp(IntEnum):
DS_READ2ST64_B64 = 120
DS_ADD_RTN_F64 = 124
DS_CONDXCHG32_RTN_B64 = 126
DS_GWS_SEMA_RELEASE_ALL = 152
DS_GWS_INIT = 153
DS_GWS_SEMA_V = 154
DS_GWS_SEMA_BR = 155
DS_GWS_SEMA_P = 156
DS_GWS_BARRIER = 157
DS_READ_ADDTID_B32 = 182
DS_PK_ADD_RTN_F16 = 183
DS_PK_ADD_RTN_BF16 = 184
@@ -174,7 +128,6 @@ class DSOp(IntEnum):
DS_READ_B64_TR_B16 = 227
DS_READ_B96 = 254
DS_READ_B128 = 255
CDNA4 = 600
class FLATOp(IntEnum):
FLAT_LOAD_UBYTE = 16
@@ -231,7 +184,6 @@ class FLATOp(IntEnum):
FLAT_ATOMIC_XOR_X2 = 106
FLAT_ATOMIC_INC_X2 = 107
FLAT_ATOMIC_DEC_X2 = 108
CDNA4 = 600
class GLOBALOp(IntEnum):
GLOBAL_LOAD_UBYTE = 16
@@ -295,7 +247,6 @@ class GLOBALOp(IntEnum):
GLOBAL_ATOMIC_DEC_X2 = 108
GLOBAL_LOAD_LDS_DWORDX4 = 125
GLOBAL_LOAD_LDS_DWORDX3 = 126
CDNA4 = 600
class MTBUFOp(IntEnum):
TBUFFER_LOAD_FORMAT_X = 0
@@ -390,7 +341,6 @@ class MUBUFOp(IntEnum):
BUFFER_ATOMIC_XOR_X2 = 106
BUFFER_ATOMIC_INC_X2 = 107
BUFFER_ATOMIC_DEC_X2 = 108
CDNA4 = 600
class SCRATCHOp(IntEnum):
SCRATCH_LOAD_UBYTE = 16
@@ -504,7 +454,6 @@ class SMEMOp(IntEnum):
S_ATOMIC_XOR_X2 = 170
S_ATOMIC_INC_X2 = 171
S_ATOMIC_DEC_X2 = 172
CDNA4 = 600
class SOP1Op(IntEnum):
S_MOV_B32 = 0
@@ -561,7 +510,6 @@ class SOP1Op(IntEnum):
S_ANDN1_WREXEC_B64 = 53
S_ANDN2_WREXEC_B64 = 54
S_BITREPLICATE_B64_B32 = 55
CDNA4 = 600
class SOP2Op(IntEnum):
S_ADD_U32 = 0
@@ -616,7 +564,6 @@ class SOP2Op(IntEnum):
S_PACK_LL_B32_B16 = 50
S_PACK_LH_B32_B16 = 51
S_PACK_HH_B32_B16 = 52
CDNA4 = 600
class SOPCOp(IntEnum):
S_CMP_EQ_I32 = 0
@@ -639,7 +586,6 @@ class SOPCOp(IntEnum):
S_SET_GPR_IDX_ON = 17
S_CMP_EQ_U64 = 18
S_CMP_LG_U64 = 19
CDNA4 = 600
class SOPKOp(IntEnum):
S_MOVK_I32 = 0
@@ -695,7 +641,6 @@ class SOPPOp(IntEnum):
S_ENDPGM_SAVED = 27
S_SET_GPR_IDX_OFF = 28
S_SET_GPR_IDX_MODE = 29
CDNA4 = 600
class VOP1Op(IntEnum):
V_NOP = 0
@@ -783,7 +728,6 @@ class VOP1Op(IntEnum):
V_PERMLANE16_SWAP_B32 = 89
V_PERMLANE32_SWAP_B32 = 90
V_CVT_F32_BF16 = 91
CDNA4 = 600
class VOP2Op(IntEnum):
V_CNDMASK_B32 = 0
@@ -848,7 +792,6 @@ class VOP2Op(IntEnum):
V_FMAC_F32 = 59
V_PK_FMAC_F16 = 60
V_XNOR_B32 = 61
CDNA4 = 600
class VOP3AOp(IntEnum):
V_CMP_CLASS_F32 = 16
@@ -1268,7 +1211,7 @@ class VOP3AOp(IntEnum):
V_CVT_SCALEF32_SR_PK32_BF6_F32 = 597
V_CVT_SCALEF32_PK32_F32_FP6 = 598
V_CVT_SCALEF32_PK32_F32_BF6 = 599
CDNA4 = 600
V_CVT_SCALEF32_PK32_FP6_F16 = 600
V_CVT_SCALEF32_PK32_FP6_BF16 = 601
V_CVT_SCALEF32_PK32_BF6_F16 = 602
V_CVT_SCALEF32_PK32_BF6_BF16 = 603
@@ -1338,7 +1281,6 @@ class VOP3BOp(IntEnum):
V_DIV_SCALE_F64 = 481
V_MAD_U64_U32 = 488
V_MAD_I64_I32 = 489
CDNA4 = 600
class VOP3POp(IntEnum):
V_PK_MAD_I16 = 0
@@ -1388,8 +1330,6 @@ class VOP3POp(IntEnum):
V_SMFMAC_F32_16X16X128_BF8_BF8 = 59
V_SMFMAC_F32_16X16X128_BF8_FP8 = 60
V_SMFMAC_F32_16X16X128_FP8_BF8 = 61
V_MFMA_F32_16X16X8_XF32 = 62
V_MFMA_F32_32X32X4_XF32 = 63
V_MFMA_F32_32X32X1_2B_F32 = 64
V_MFMA_F32_16X16X1_4B_F32 = 65
V_MFMA_F32_4X4X1_16B_F32 = 66
@@ -1447,7 +1387,6 @@ class VOP3POp(IntEnum):
V_SMFMAC_F32_32X32X32_BF8_FP8 = 125
V_SMFMAC_F32_32X32X32_FP8_BF8 = 126
V_SMFMAC_F32_32X32X32_FP8_FP8 = 127
CDNA4 = 600
class VOPCOp(IntEnum):
V_CMP_CLASS_F32 = 16
@@ -1648,4 +1587,3 @@ class VOPCOp(IntEnum):
V_CMPX_NE_U64 = 253
V_CMPX_GE_U64 = 254
V_CMPX_T_U64 = 255
CDNA4 = 600

View File

@@ -1,19 +1,18 @@
# autogenerated from AMD CDNA3+CDNA4 ISA PDF by pdf.py - do not edit
# autogenerated from AMD ISA PDF by pdf.py - do not edit
# ruff: noqa: F401,F403
from typing import Annotated
from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, Inst96, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
from extra.assembly.amd.dsl import *
from extra.assembly.amd.autogen.cdna.enum import *
import functools
# instruction formats
class DPP(Inst64):
class DPP(Inst):
encoding = bits[8:0] == 0b11111010
vop_op = bits[16:9]
vdst:VGPRField = bits[24:17]
vop2_op = bits[31:25]
src0:Src = bits[39:32]
vop_op = bits[16:9]
vop2_op = bits[31:25]
dpp_ctrl = bits[48:40]
bound_ctrl = bits[51]
bc = bits[51]
src0_neg = bits[52]
src0_abs = bits[53]
src1_neg = bits[54]
@@ -21,7 +20,7 @@ class DPP(Inst64):
bank_mask = bits[59:56]
row_mask = bits[63:60]
class DS(Inst64):
class DS(Inst):
encoding = bits[31:26] == 0b110110
op:Annotated[BitField, DSOp] = bits[24:17]
vdst:VGPRField = bits[63:56]
@@ -33,7 +32,7 @@ class DS(Inst64):
gds = bits[16]
acc = bits[25]
class FLAT(Inst64):
class FLAT(Inst):
encoding = bits[31:26] == 0b110111
op:Annotated[BitField, FLATOp] = bits[24:18]
vdst:VGPRField = bits[63:56]
@@ -48,7 +47,7 @@ class FLAT(Inst64):
sc1 = bits[25]
acc = bits[55]
class MTBUF(Inst64):
class MTBUF(Inst):
encoding = bits[31:26] == 0b111010
op:Annotated[BitField, MTBUFOp] = bits[18:15]
vdata:VGPRField = bits[47:40]
@@ -56,15 +55,16 @@ class MTBUF(Inst64):
srsrc:SGPRField = bits[52:48]
soffset:SSrc = bits[63:56]
offset:Imm = bits[11:0]
format = bits[25:19]
offen = bits[12]
idxen = bits[13]
sc0 = bits[14]
dfmt = bits[22:19]
nfmt = bits[25:23]
sc1 = bits[53]
nt = bits[54]
acc = bits[55]
sc0 = bits[14]
class MUBUF(Inst64):
class MUBUF(Inst):
encoding = bits[31:26] == 0b111000
op:Annotated[BitField, MUBUFOp] = bits[24:18]
vdata:VGPRField = bits[47:40]
@@ -80,16 +80,16 @@ class MUBUF(Inst64):
nt = bits[17]
acc = bits[55]
class SDWA(Inst64):
class SDWA(Inst):
encoding = bits[8:0] == 0b11111001
vop_op = bits[16:9]
vdst:VGPRField = bits[24:17]
src0:Src = bits[39:32]
omod = bits[47:46]
clmp = bits[45]
vop_op = bits[16:9]
vop2_op = bits[31:25]
src0:Src = bits[39:32]
dst_sel = bits[42:40]
dst_u = bits[44:43]
clmp = bits[45]
omod = bits[47:46]
src0_sel = bits[50:48]
src0_sext = bits[51]
src0_neg = bits[52]
@@ -101,12 +101,10 @@ class SDWA(Inst64):
src1_abs = bits[61]
s1 = bits[63]
class SDWAB(Inst64):
class SDWAB(Inst):
sdst:SGPRField = bits[46:40]
src0:Src = bits[39:32]
dst_sel = bits[42:40]
dst_u = bits[44:43]
clmp = bits[45]
omod = bits[47:46]
sd = bits[47]
src0_sel = bits[50:48]
src0_sext = bits[51]
src0_neg = bits[52]
@@ -118,7 +116,7 @@ class SDWAB(Inst64):
src1_abs = bits[61]
s1 = bits[63]
class SMEM(Inst64):
class SMEM(Inst):
encoding = bits[31:26] == 0b110000
op:Annotated[BitField, SMEMOp] = bits[25:18]
sdata:SGPRField = bits[12:6]
@@ -128,79 +126,78 @@ class SMEM(Inst64):
glc = bits[16]
soe = bits[14]
nv = bits[15]
imm = bits[17]
imm:Imm = bits[17]
class SOP1(Inst32):
class SOP1(Inst):
encoding = bits[31:23] == 0b101111101
op:Annotated[BitField, SOP1Op] = bits[15:8]
sdst:SGPRField = bits[22:16]
ssrc0:SSrc = bits[7:0]
class SOP2(Inst32):
class SOP2(Inst):
encoding = bits[31:30] == 0b10
op:Annotated[BitField, SOP2Op] = bits[29:23]
sdst:SGPRField = bits[22:16]
ssrc0:SSrc = bits[7:0]
ssrc1:SSrc = bits[15:8]
class SOPC(Inst32):
class SOPC(Inst):
encoding = bits[31:23] == 0b101111110
op:Annotated[BitField, SOPCOp] = bits[22:16]
ssrc0:SSrc = bits[7:0]
ssrc1:SSrc = bits[15:8]
class SOPK(Inst32):
class SOPK(Inst):
encoding = bits[31:28] == 0b1011
op:Annotated[BitField, SOPKOp] = bits[27:23]
sdst:SGPRField = bits[22:16]
simm16:SImm = bits[15:0]
class SOPP(Inst32):
class SOPP(Inst):
encoding = bits[31:23] == 0b101111111
op:Annotated[BitField, SOPPOp] = bits[22:16]
simm16:SImm = bits[15:0]
class VOP1(Inst32):
encoding = bits[31:25] == 0b111111
class VOP1(Inst):
encoding = bits[31:25] == 0b0111111
op:Annotated[BitField, VOP1Op] = bits[16:9]
vdst:VGPRField = bits[24:17]
src0:Src = bits[8:0]
class VOP2(Inst32):
encoding = bits[31] == 0
class VOP2(Inst):
encoding = bits[31] == 0b0
op:Annotated[BitField, VOP2Op] = bits[30:25]
vdst:VGPRField = bits[24:17]
src0:Src = bits[8:0]
vsrc1:VGPRField = bits[16:9]
class VOP3A(Inst64):
class VOP3A(Inst):
encoding = bits[31:26] == 0b110100
vdst:VGPRField = bits[7:0]
abs = bits[10:8]
opsel = bits[14:11]
clmp = bits[15]
op:Annotated[BitField, VOP3AOp] = bits[25:16]
vdst:VGPRField = bits[7:0]
src0:Src = bits[40:32]
src1:Src = bits[49:41]
src2:Src = bits[58:50]
omod = bits[60:59]
neg = bits[63:61]
abs = bits[10:8]
clmp = bits[15]
opsel = bits[14:11]
class VOP3B(Inst64):
class VOP3B(Inst):
encoding = bits[31:26] == 0b110100
op:Annotated[BitField, VOP3BOp] = bits[25:16]
vdst:VGPRField = bits[7:0]
sdst:SGPRField = bits[14:8]
clmp = bits[15]
op:Annotated[BitField, VOP3BOp] = bits[25:16]
src0:Src = bits[40:32]
src1:Src = bits[49:41]
src2:Src = bits[58:50]
omod = bits[60:59]
neg = bits[63:61]
clmp = bits[15]
class VOP3P(Inst64):
class VOP3P(Inst):
encoding = bits[31:23] == 0b110100111
_defaults = {'opsel_hi': 3, 'opsel_hi2': 1}
op:Annotated[BitField, VOP3POp] = bits[22:16]
vdst:VGPRField = bits[7:0]
src0:Src = bits[40:32]
@@ -208,13 +205,13 @@ class VOP3P(Inst64):
src2:Src = bits[58:50]
neg = bits[63:61]
neg_hi = bits[10:8]
clmp = bits[15]
opsel = bits[13:11]
opsel_hi = bits[60:59]
clmp = bits[15]
opsel_hi2 = bits[14]
class VOPC(Inst32):
encoding = bits[31:25] == 0b111110
class VOPC(Inst):
encoding = bits[31:25] == 0b0111110
op:Annotated[BitField, VOPCOp] = bits[24:17]
src0:Src = bits[8:0]
vsrc1:VGPRField = bits[16:9]
@@ -333,12 +330,6 @@ ds_read2_b64 = functools.partial(DS, DSOp.DS_READ2_B64)
ds_read2st64_b64 = functools.partial(DS, DSOp.DS_READ2ST64_B64)
ds_add_rtn_f64 = functools.partial(DS, DSOp.DS_ADD_RTN_F64)
ds_condxchg32_rtn_b64 = functools.partial(DS, DSOp.DS_CONDXCHG32_RTN_B64)
ds_gws_sema_release_all = functools.partial(DS, DSOp.DS_GWS_SEMA_RELEASE_ALL)
ds_gws_init = functools.partial(DS, DSOp.DS_GWS_INIT)
ds_gws_sema_v = functools.partial(DS, DSOp.DS_GWS_SEMA_V)
ds_gws_sema_br = functools.partial(DS, DSOp.DS_GWS_SEMA_BR)
ds_gws_sema_p = functools.partial(DS, DSOp.DS_GWS_SEMA_P)
ds_gws_barrier = functools.partial(DS, DSOp.DS_GWS_BARRIER)
ds_read_addtid_b32 = functools.partial(DS, DSOp.DS_READ_ADDTID_B32)
ds_pk_add_rtn_f16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_F16)
ds_pk_add_rtn_bf16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_BF16)
@@ -352,7 +343,6 @@ ds_read_b64_tr_b8 = functools.partial(DS, DSOp.DS_READ_B64_TR_B8)
ds_read_b64_tr_b16 = functools.partial(DS, DSOp.DS_READ_B64_TR_B16)
ds_read_b96 = functools.partial(DS, DSOp.DS_READ_B96)
ds_read_b128 = functools.partial(DS, DSOp.DS_READ_B128)
cdna4 = functools.partial(DS, DSOp.CDNA4)
flat_load_ubyte = functools.partial(FLAT, FLATOp.FLAT_LOAD_UBYTE)
flat_load_sbyte = functools.partial(FLAT, FLATOp.FLAT_LOAD_SBYTE)
flat_load_ushort = functools.partial(FLAT, FLATOp.FLAT_LOAD_USHORT)
@@ -407,7 +397,6 @@ flat_atomic_or_x2 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_OR_X2)
flat_atomic_xor_x2 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_XOR_X2)
flat_atomic_inc_x2 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_INC_X2)
flat_atomic_dec_x2 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_DEC_X2)
cdna4 = functools.partial(FLAT, FLATOp.CDNA4)
global_load_ubyte = functools.partial(FLAT, GLOBALOp.GLOBAL_LOAD_UBYTE, seg=2)
global_load_sbyte = functools.partial(FLAT, GLOBALOp.GLOBAL_LOAD_SBYTE, seg=2)
global_load_ushort = functools.partial(FLAT, GLOBALOp.GLOBAL_LOAD_USHORT, seg=2)
@@ -469,7 +458,6 @@ global_atomic_inc_x2 = functools.partial(FLAT, GLOBALOp.GLOBAL_ATOMIC_INC_X2, se
global_atomic_dec_x2 = functools.partial(FLAT, GLOBALOp.GLOBAL_ATOMIC_DEC_X2, seg=2)
global_load_lds_dwordx4 = functools.partial(FLAT, GLOBALOp.GLOBAL_LOAD_LDS_DWORDX4, seg=2)
global_load_lds_dwordx3 = functools.partial(FLAT, GLOBALOp.GLOBAL_LOAD_LDS_DWORDX3, seg=2)
cdna4 = functools.partial(FLAT, GLOBALOp.CDNA4, seg=2)
tbuffer_load_format_x = functools.partial(MTBUF, MTBUFOp.TBUFFER_LOAD_FORMAT_X)
tbuffer_load_format_xy = functools.partial(MTBUF, MTBUFOp.TBUFFER_LOAD_FORMAT_XY)
tbuffer_load_format_xyz = functools.partial(MTBUF, MTBUFOp.TBUFFER_LOAD_FORMAT_XYZ)
@@ -560,7 +548,6 @@ buffer_atomic_or_x2 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_OR_X2)
buffer_atomic_xor_x2 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_XOR_X2)
buffer_atomic_inc_x2 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_INC_X2)
buffer_atomic_dec_x2 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_DEC_X2)
cdna4 = functools.partial(MUBUF, MUBUFOp.CDNA4)
scratch_load_ubyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_UBYTE, seg=1)
scratch_load_sbyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SBYTE, seg=1)
scratch_load_ushort = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_USHORT, seg=1)
@@ -670,7 +657,6 @@ s_atomic_or_x2 = functools.partial(SMEM, SMEMOp.S_ATOMIC_OR_X2)
s_atomic_xor_x2 = functools.partial(SMEM, SMEMOp.S_ATOMIC_XOR_X2)
s_atomic_inc_x2 = functools.partial(SMEM, SMEMOp.S_ATOMIC_INC_X2)
s_atomic_dec_x2 = functools.partial(SMEM, SMEMOp.S_ATOMIC_DEC_X2)
cdna4 = functools.partial(SMEM, SMEMOp.CDNA4)
s_mov_b32 = functools.partial(SOP1, SOP1Op.S_MOV_B32)
s_mov_b64 = functools.partial(SOP1, SOP1Op.S_MOV_B64)
s_cmov_b32 = functools.partial(SOP1, SOP1Op.S_CMOV_B32)
@@ -725,7 +711,6 @@ s_orn1_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_ORN1_SAVEEXEC_B64)
s_andn1_wrexec_b64 = functools.partial(SOP1, SOP1Op.S_ANDN1_WREXEC_B64)
s_andn2_wrexec_b64 = functools.partial(SOP1, SOP1Op.S_ANDN2_WREXEC_B64)
s_bitreplicate_b64_b32 = functools.partial(SOP1, SOP1Op.S_BITREPLICATE_B64_B32)
cdna4 = functools.partial(SOP1, SOP1Op.CDNA4)
s_add_u32 = functools.partial(SOP2, SOP2Op.S_ADD_U32)
s_sub_u32 = functools.partial(SOP2, SOP2Op.S_SUB_U32)
s_add_i32 = functools.partial(SOP2, SOP2Op.S_ADD_I32)
@@ -778,7 +763,6 @@ s_lshl4_add_u32 = functools.partial(SOP2, SOP2Op.S_LSHL4_ADD_U32)
s_pack_ll_b32_b16 = functools.partial(SOP2, SOP2Op.S_PACK_LL_B32_B16)
s_pack_lh_b32_b16 = functools.partial(SOP2, SOP2Op.S_PACK_LH_B32_B16)
s_pack_hh_b32_b16 = functools.partial(SOP2, SOP2Op.S_PACK_HH_B32_B16)
cdna4 = functools.partial(SOP2, SOP2Op.CDNA4)
s_cmp_eq_i32 = functools.partial(SOPC, SOPCOp.S_CMP_EQ_I32)
s_cmp_lg_i32 = functools.partial(SOPC, SOPCOp.S_CMP_LG_I32)
s_cmp_gt_i32 = functools.partial(SOPC, SOPCOp.S_CMP_GT_I32)
@@ -799,7 +783,6 @@ s_setvskip = functools.partial(SOPC, SOPCOp.S_SETVSKIP)
s_set_gpr_idx_on = functools.partial(SOPC, SOPCOp.S_SET_GPR_IDX_ON)
s_cmp_eq_u64 = functools.partial(SOPC, SOPCOp.S_CMP_EQ_U64)
s_cmp_lg_u64 = functools.partial(SOPC, SOPCOp.S_CMP_LG_U64)
cdna4 = functools.partial(SOPC, SOPCOp.CDNA4)
s_movk_i32 = functools.partial(SOPK, SOPKOp.S_MOVK_I32)
s_cmovk_i32 = functools.partial(SOPK, SOPKOp.S_CMOVK_I32)
s_cmpk_eq_i32 = functools.partial(SOPK, SOPKOp.S_CMPK_EQ_I32)
@@ -851,7 +834,6 @@ s_cbranch_cdbgsys_and_user = functools.partial(SOPP, SOPPOp.S_CBRANCH_CDBGSYS_AN
s_endpgm_saved = functools.partial(SOPP, SOPPOp.S_ENDPGM_SAVED)
s_set_gpr_idx_off = functools.partial(SOPP, SOPPOp.S_SET_GPR_IDX_OFF)
s_set_gpr_idx_mode = functools.partial(SOPP, SOPPOp.S_SET_GPR_IDX_MODE)
cdna4 = functools.partial(SOPP, SOPPOp.CDNA4)
v_nop_e32 = functools.partial(VOP1, VOP1Op.V_NOP)
v_mov_b32_e32 = functools.partial(VOP1, VOP1Op.V_MOV_B32)
v_readfirstlane_b32_e32 = functools.partial(VOP1, VOP1Op.V_READFIRSTLANE_B32)
@@ -937,7 +919,6 @@ v_prng_b32_e32 = functools.partial(VOP1, VOP1Op.V_PRNG_B32)
v_permlane16_swap_b32_e32 = functools.partial(VOP1, VOP1Op.V_PERMLANE16_SWAP_B32)
v_permlane32_swap_b32_e32 = functools.partial(VOP1, VOP1Op.V_PERMLANE32_SWAP_B32)
v_cvt_f32_bf16_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_BF16)
cdna4_e32 = functools.partial(VOP1, VOP1Op.CDNA4)
v_cndmask_b32_e32 = functools.partial(VOP2, VOP2Op.V_CNDMASK_B32)
v_add_f32_e32 = functools.partial(VOP2, VOP2Op.V_ADD_F32)
v_sub_f32_e32 = functools.partial(VOP2, VOP2Op.V_SUB_F32)
@@ -961,8 +942,8 @@ v_and_b32_e32 = functools.partial(VOP2, VOP2Op.V_AND_B32)
v_or_b32_e32 = functools.partial(VOP2, VOP2Op.V_OR_B32)
v_xor_b32_e32 = functools.partial(VOP2, VOP2Op.V_XOR_B32)
v_dot2c_f32_bf16_e32 = functools.partial(VOP2, VOP2Op.V_DOT2C_F32_BF16)
def v_fmamk_f32_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F32, vdst, src0, vsrc1, literal=K)
def v_fmaak_f32_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F32, vdst, src0, vsrc1, literal=K)
v_fmamk_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAMK_F32)
v_fmaak_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAAK_F32)
v_add_co_u32_e32 = functools.partial(VOP2, VOP2Op.V_ADD_CO_U32)
v_sub_co_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUB_CO_U32)
v_subrev_co_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_CO_U32)
@@ -1000,7 +981,6 @@ v_dot8c_i32_i4_e32 = functools.partial(VOP2, VOP2Op.V_DOT8C_I32_I4)
v_fmac_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F32)
v_pk_fmac_f16_e32 = functools.partial(VOP2, VOP2Op.V_PK_FMAC_F16)
v_xnor_b32_e32 = functools.partial(VOP2, VOP2Op.V_XNOR_B32)
cdna4_e32 = functools.partial(VOP2, VOP2Op.CDNA4)
v_cmp_class_f32 = functools.partial(VOP3A, VOP3AOp.V_CMP_CLASS_F32)
v_cmpx_class_f32 = functools.partial(VOP3A, VOP3AOp.V_CMPX_CLASS_F32)
v_cmp_class_f64 = functools.partial(VOP3A, VOP3AOp.V_CMP_CLASS_F64)
@@ -1418,7 +1398,7 @@ v_cvt_scalef32_sr_pk32_fp6_f32 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32
v_cvt_scalef32_sr_pk32_bf6_f32 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_SR_PK32_BF6_F32)
v_cvt_scalef32_pk32_f32_fp6 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_F32_FP6)
v_cvt_scalef32_pk32_f32_bf6 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_F32_BF6)
cdna4 = functools.partial(VOP3A, VOP3AOp.CDNA4)
v_cvt_scalef32_pk32_fp6_f16 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_FP6_F16)
v_cvt_scalef32_pk32_fp6_bf16 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_FP6_BF16)
v_cvt_scalef32_pk32_bf6_f16 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_BF6_F16)
v_cvt_scalef32_pk32_bf6_bf16 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_BF6_BF16)
@@ -1486,7 +1466,6 @@ v_div_scale_f32 = functools.partial(VOP3B, VOP3BOp.V_DIV_SCALE_F32)
v_div_scale_f64 = functools.partial(VOP3B, VOP3BOp.V_DIV_SCALE_F64)
v_mad_u64_u32 = functools.partial(VOP3B, VOP3BOp.V_MAD_U64_U32)
v_mad_i64_i32 = functools.partial(VOP3B, VOP3BOp.V_MAD_I64_I32)
cdna4 = functools.partial(VOP3B, VOP3BOp.CDNA4)
v_pk_mad_i16 = functools.partial(VOP3P, VOP3POp.V_PK_MAD_I16)
v_pk_mul_lo_u16 = functools.partial(VOP3P, VOP3POp.V_PK_MUL_LO_U16)
v_pk_add_i16 = functools.partial(VOP3P, VOP3POp.V_PK_ADD_I16)
@@ -1534,8 +1513,6 @@ v_smfmac_i32_16x16x128_i8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_I32_16X16X
v_smfmac_f32_16x16x128_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_BF8)
v_smfmac_f32_16x16x128_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_FP8)
v_smfmac_f32_16x16x128_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_BF8)
v_mfma_f32_16x16x8_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X8_XF32)
v_mfma_f32_32x32x4_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X4_XF32)
v_mfma_f32_32x32x1_2b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X1_2B_F32)
v_mfma_f32_16x16x1_4b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X1_4B_F32)
v_mfma_f32_4x4x1_16b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_4X4X1_16B_F32)
@@ -1593,7 +1570,6 @@ v_smfmac_f32_32x32x32_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32
v_smfmac_f32_32x32x32_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_BF8_FP8)
v_smfmac_f32_32x32x32_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_BF8)
v_smfmac_f32_32x32x32_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_FP8)
cdna4 = functools.partial(VOP3P, VOP3POp.CDNA4)
v_cmp_class_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_CLASS_F32)
v_cmpx_class_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_CLASS_F32)
v_cmp_class_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_CLASS_F64)
@@ -1791,42 +1767,4 @@ v_cmpx_le_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LE_U64)
v_cmpx_gt_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GT_U64)
v_cmpx_ne_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NE_U64)
v_cmpx_ge_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GE_U64)
v_cmpx_t_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_T_U64)
cdna4_e32 = functools.partial(VOPC, VOPCOp.CDNA4)
S_ADD_U32 = SrcEnum.S_ADD_U32
S_SUB_U32 = SrcEnum.S_SUB_U32
S_ADD_I32 = SrcEnum.S_ADD_I32
S_SUB_I32 = SrcEnum.S_SUB_I32
S_ADDC_U32 = SrcEnum.S_ADDC_U32
S_SUBB_U32 = SrcEnum.S_SUBB_U32
S_MIN_I32 = SrcEnum.S_MIN_I32
FLAT_SCRATCH_LO = SrcEnum.FLAT_SCRATCH_LO
FLAT_SCRATCH_HI = SrcEnum.FLAT_SCRATCH_HI
XNACK_MASK_LO = SrcEnum.XNACK_MASK_LO
XNACK_MASK_HI = SrcEnum.XNACK_MASK_HI
VCC_LO = SrcEnum.VCC_LO
VCC_HI = SrcEnum.VCC_HI
M0 = SrcEnum.M0
EXEC_LO = SrcEnum.EXEC_LO
EXEC_HI = SrcEnum.EXEC_HI
ZERO = SrcEnum.ZERO
DPP8FI = SrcEnum.DPP8FI
SHARED_BASE = SrcEnum.SHARED_BASE
SHARED_LIMIT = SrcEnum.SHARED_LIMIT
PRIVATE_BASE = SrcEnum.PRIVATE_BASE
PRIVATE_LIMIT = SrcEnum.PRIVATE_LIMIT
RESERVED = SrcEnum.RESERVED
POS_HALF = SrcEnum.POS_HALF
NEG_HALF = SrcEnum.NEG_HALF
POS_ONE = SrcEnum.POS_ONE
NEG_ONE = SrcEnum.NEG_ONE
POS_TWO = SrcEnum.POS_TWO
NEG_TWO = SrcEnum.NEG_TWO
POS_FOUR = SrcEnum.POS_FOUR
NEG_FOUR = SrcEnum.NEG_FOUR
INV_2PI = SrcEnum.INV_2PI
VCCZ = SrcEnum.VCCZ
EXECZ = SrcEnum.EXECZ
SCC = SrcEnum.SCC
LDS_DIRECT = SrcEnum.LDS_DIRECT
v_cmpx_t_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_T_U64)

File diff suppressed because one or more lines are too long

View File

@@ -1,34 +1,97 @@
# autogenerated from AMD RDNA3.5 ISA PDF by pdf.py - do not edit
# autogenerated from AMD ISA PDF by pdf.py - do not edit
from enum import IntEnum
class SrcEnum(IntEnum):
VCC_LO = 106
VCC_HI = 107
NULL = 124
M0 = 125
EXEC_LO = 126
EXEC_HI = 127
ZERO = 128
DPP8 = 233
DPP8FI = 234
SHARED_BASE = 235
SHARED_LIMIT = 236
PRIVATE_BASE = 237
PRIVATE_LIMIT = 238
POS_HALF = 240
NEG_HALF = 241
POS_ONE = 242
NEG_ONE = 243
POS_TWO = 244
NEG_TWO = 245
POS_FOUR = 246
NEG_FOUR = 247
INV_2PI = 248
DPP16 = 250
VCCZ = 251
EXECZ = 252
SCC = 253
LDS_DIRECT = 254
class BufFmt(IntEnum):
BUF_FMT_8_UNORM = 1
BUF_FMT_8_SNORM = 2
BUF_FMT_8_USCALED = 3
BUF_FMT_8_SSCALED = 4
BUF_FMT_8_UINT = 5
BUF_FMT_8_SINT = 6
BUF_FMT_16_UNORM = 7
BUF_FMT_16_SNORM = 8
BUF_FMT_16_USCALED = 9
BUF_FMT_16_SSCALED = 10
BUF_FMT_16_UINT = 11
BUF_FMT_16_SINT = 12
BUF_FMT_16_FLOAT = 13
BUF_FMT_8_8_UNORM = 14
BUF_FMT_8_8_SNORM = 15
BUF_FMT_8_8_USCALED = 16
BUF_FMT_8_8_SSCALED = 17
BUF_FMT_8_8_UINT = 18
BUF_FMT_8_8_SINT = 19
BUF_FMT_32_UINT = 20
BUF_FMT_32_SINT = 21
BUF_FMT_32_FLOAT = 22
BUF_FMT_16_16_UNORM = 23
BUF_FMT_16_16_SNORM = 24
BUF_FMT_16_16_USCALED = 25
BUF_FMT_16_16_SSCALED = 26
BUF_FMT_16_16_UINT = 27
BUF_FMT_16_16_SINT = 28
BUF_FMT_16_16_FLOAT = 29
BUF_FMT_10_11_11_FLOAT = 30
BUF_FMT_11_11_10_FLOAT = 31
BUF_FMT_10_10_10_2_UNORM = 32
BUF_FMT_10_10_10_2_SNORM = 33
BUF_FMT_10_10_10_2_UINT = 34
BUF_FMT_10_10_10_2_SINT = 35
BUF_FMT_2_10_10_10_UNORM = 36
BUF_FMT_2_10_10_10_SNORM = 37
BUF_FMT_2_10_10_10_USCALED = 38
BUF_FMT_2_10_10_10_SSCALED = 39
BUF_FMT_2_10_10_10_UINT = 40
BUF_FMT_2_10_10_10_SINT = 41
BUF_FMT_8_8_8_8_UNORM = 42
BUF_FMT_8_8_8_8_SNORM = 43
BUF_FMT_8_8_8_8_USCALED = 44
BUF_FMT_8_8_8_8_SSCALED = 45
BUF_FMT_8_8_8_8_UINT = 46
BUF_FMT_8_8_8_8_SINT = 47
BUF_FMT_32_32_UINT = 48
BUF_FMT_32_32_SINT = 49
BUF_FMT_32_32_FLOAT = 50
BUF_FMT_16_16_16_16_UNORM = 51
BUF_FMT_16_16_16_16_SNORM = 52
BUF_FMT_16_16_16_16_USCALED = 53
BUF_FMT_16_16_16_16_SSCALED = 54
BUF_FMT_16_16_16_16_UINT = 55
BUF_FMT_16_16_16_16_SINT = 56
BUF_FMT_16_16_16_16_FLOAT = 57
BUF_FMT_32_32_32_UINT = 58
BUF_FMT_32_32_32_SINT = 59
BUF_FMT_32_32_32_FLOAT = 60
BUF_FMT_32_32_32_32_UINT = 61
BUF_FMT_8_SRGB = 64
BUF_FMT_8_8_SRGB = 65
BUF_FMT_8_8_8_8_SRGB = 66
BUF_FMT_5_9_9_9_FLOAT = 67
BUF_FMT_5_6_5_UNORM = 68
BUF_FMT_1_5_5_5_UNORM = 69
BUF_FMT_5_5_5_1_UNORM = 70
BUF_FMT_4_4_4_4_UNORM = 71
BUF_FMT_4_4_UNORM = 72
BUF_FMT_1_UNORM = 73
BUF_FMT_1_REVERSED_UNORM = 74
BUF_FMT_32_FLOAT_CLAMP = 75
BUF_FMT_8_24_UNORM = 76
BUF_FMT_8_24_UINT = 77
BUF_FMT_24_8_UNORM = 78
BUF_FMT_24_8_UINT = 79
BUF_FMT_X24_8_32_UINT = 80
BUF_FMT_X24_8_32_FLOAT = 81
BUF_FMT_GB_GR_UNORM = 82
BUF_FMT_GB_GR_SNORM = 83
BUF_FMT_GB_GR_UINT = 84
BUF_FMT_GB_GR_SRGB = 85
BUF_FMT_BG_RG_UNORM = 86
BUF_FMT_BG_RG_SNORM = 87
BUF_FMT_BG_RG_UINT = 88
BUF_FMT_BG_RG_SRGB = 89
BUF_FMT_BC1_UNORM = 109
BUF_FMT_BC1_SRGB = 110
BUF_FMT_BC2_UNORM = 111
class DSOp(IntEnum):
DS_ADD_U32 = 0
@@ -1372,7 +1435,6 @@ class VOP3POp(IntEnum):
V_WMMA_I32_16X16X16_IU4 = 69
class VOP3SDOp(IntEnum):
DWORD = 1
V_ADD_CO_CI_U32 = 288
V_SUB_CO_CI_U32 = 289
V_SUBREV_CO_CI_U32 = 290
@@ -1594,68 +1656,3 @@ class VOPDOp(IntEnum):
V_DUAL_ADD_NC_U32 = 16
V_DUAL_LSHLREV_B32 = 17
V_DUAL_AND_B32 = 18
class BufFmt(IntEnum):
BUF_FMT_8_UNORM = 1
BUF_FMT_8_SNORM = 2
BUF_FMT_8_USCALED = 3
BUF_FMT_8_SSCALED = 4
BUF_FMT_8_UINT = 5
BUF_FMT_8_SINT = 6
BUF_FMT_16_UNORM = 7
BUF_FMT_16_SNORM = 8
BUF_FMT_16_USCALED = 9
BUF_FMT_16_SSCALED = 10
BUF_FMT_16_UINT = 11
BUF_FMT_16_SINT = 12
BUF_FMT_16_FLOAT = 13
BUF_FMT_8_8_UNORM = 14
BUF_FMT_8_8_SNORM = 15
BUF_FMT_8_8_USCALED = 16
BUF_FMT_8_8_SSCALED = 17
BUF_FMT_8_8_UINT = 18
BUF_FMT_8_8_SINT = 19
BUF_FMT_32_UINT = 20
BUF_FMT_32_SINT = 21
BUF_FMT_32_FLOAT = 22
BUF_FMT_16_16_UNORM = 23
BUF_FMT_16_16_SNORM = 24
BUF_FMT_16_16_USCALED = 25
BUF_FMT_16_16_SSCALED = 26
BUF_FMT_16_16_UINT = 27
BUF_FMT_16_16_SINT = 28
BUF_FMT_16_16_FLOAT = 29
BUF_FMT_10_11_11_FLOAT = 30
BUF_FMT_11_11_10_FLOAT = 31
BUF_FMT_10_10_10_2_UNORM = 32
BUF_FMT_10_10_10_2_SNORM = 33
BUF_FMT_10_10_10_2_UINT = 34
BUF_FMT_10_10_10_2_SINT = 35
BUF_FMT_2_10_10_10_UNORM = 36
BUF_FMT_2_10_10_10_SNORM = 37
BUF_FMT_2_10_10_10_USCALED = 38
BUF_FMT_2_10_10_10_SSCALED = 39
BUF_FMT_2_10_10_10_UINT = 40
BUF_FMT_2_10_10_10_SINT = 41
BUF_FMT_8_8_8_8_UNORM = 42
BUF_FMT_8_8_8_8_SNORM = 43
BUF_FMT_8_8_8_8_USCALED = 44
BUF_FMT_8_8_8_8_SSCALED = 45
BUF_FMT_8_8_8_8_UINT = 46
BUF_FMT_8_8_8_8_SINT = 47
BUF_FMT_32_32_UINT = 48
BUF_FMT_32_32_SINT = 49
BUF_FMT_32_32_FLOAT = 50
BUF_FMT_16_16_16_16_UNORM = 51
BUF_FMT_16_16_16_16_SNORM = 52
BUF_FMT_16_16_16_16_USCALED = 53
BUF_FMT_16_16_16_16_SSCALED = 54
BUF_FMT_16_16_16_16_UINT = 55
BUF_FMT_16_16_16_16_SINT = 56
BUF_FMT_16_16_16_16_FLOAT = 57
BUF_FMT_32_32_32_UINT = 58
BUF_FMT_32_32_32_SINT = 59
BUF_FMT_32_32_32_FLOAT = 60
BUF_FMT_32_32_32_32_UINT = 61
BUF_FMT_32_32_32_32_SINT = 62
BUF_FMT_32_32_32_32_FLOAT = 63

View File

@@ -1,12 +1,11 @@
# autogenerated from AMD RDNA3.5 ISA PDF by pdf.py - do not edit
# autogenerated from AMD ISA PDF by pdf.py - do not edit
# ruff: noqa: F401,F403
from typing import Annotated
from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, Inst96, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
from extra.assembly.amd.dsl import *
from extra.assembly.amd.autogen.rdna3.enum import *
import functools
# instruction formats
class DPP16(Inst64):
class DPP16(Inst):
src0:Src = bits[39:32]
dpp_ctrl = bits[48:40]
fi = bits[50]
@@ -18,7 +17,7 @@ class DPP16(Inst64):
bank_mask = bits[59:56]
row_mask = bits[63:60]
class DPP8(Inst64):
class DPP8(Inst):
src0:Src = bits[39:32]
lane_sel0 = bits[42:40]
lane_sel1 = bits[45:43]
@@ -29,7 +28,7 @@ class DPP8(Inst64):
lane_sel6 = bits[60:58]
lane_sel7 = bits[63:61]
class DS(Inst64):
class DS(Inst):
encoding = bits[31:26] == 0b110110
op:Annotated[BitField, DSOp] = bits[25:18]
vdst:VGPRField = bits[63:56]
@@ -40,18 +39,18 @@ class DS(Inst64):
offset1 = bits[15:8]
gds = bits[17]
class EXP(Inst64):
class EXP(Inst):
encoding = bits[31:26] == 0b111110
vsrc0:VGPRField = bits[39:32]
vsrc1:VGPRField = bits[47:40]
vsrc2:VGPRField = bits[55:48]
vsrc3:VGPRField = bits[63:56]
en = bits[3:0]
target = bits[9:4]
vsrc0 = bits[39:32]
vsrc1:VGPRField = bits[47:40]
vsrc2 = bits[55:48]
vsrc3 = bits[63:56]
done = bits[11]
row = bits[13]
class FLAT(Inst64):
class FLAT(Inst):
encoding = bits[31:26] == 0b110111
op:Annotated[BitField, FLATOp] = bits[24:18]
vdst:VGPRField = bits[63:56]
@@ -60,12 +59,12 @@ class FLAT(Inst64):
saddr:SSrc = bits[54:48]
offset:Imm = bits[12:0]
seg = bits[17:16]
dlc = bits[13]
glc = bits[14]
dlc = bits[13]
slc = bits[15]
sve = bits[55]
class LDSDIR(Inst32):
class LDSDIR(Inst):
encoding = bits[31:24] == 0b11001110
op = bits[21:20]
vdst:VGPRField = bits[7:0]
@@ -73,29 +72,29 @@ class LDSDIR(Inst32):
attr_chan = bits[9:8]
wait_va = bits[19:16]
class MIMG(Inst64):
class MIMG(Inst):
encoding = bits[31:26] == 0b111100
op:Annotated[BitField, MIMGOp] = bits[25:18]
vdata:VGPRField = bits[47:40]
vaddr:VGPRField = bits[39:32]
srsrc:SGPRField = bits[52:48]
ssamp = bits[62:58]
ssamp:SGPRField = bits[62:58]
dmask = bits[11:8]
dim = bits[4:2]
unrm = bits[7]
dlc = bits[13]
glc = bits[14]
dlc = bits[13]
slc = bits[12]
tfe = bits[53]
unrm = bits[7]
nsa = bits[0]
r128 = bits[15]
a16 = bits[16]
d16 = bits[17]
tfe = bits[53]
lwe = bits[54]
addr1 = bits[71:64]
addr2 = bits[79:72]
class MTBUF(Inst64):
class MTBUF(Inst):
encoding = bits[31:26] == 0b111010
op:Annotated[BitField, MTBUFOp] = bits[18:15]
vdata:VGPRField = bits[47:40]
@@ -111,7 +110,7 @@ class MTBUF(Inst64):
slc = bits[12]
tfe = bits[53]
class MUBUF(Inst64):
class MUBUF(Inst):
encoding = bits[31:26] == 0b111000
op:Annotated[BitField, MUBUFOp] = bits[25:18]
vdata:VGPRField = bits[47:40]
@@ -126,7 +125,7 @@ class MUBUF(Inst64):
slc = bits[12]
tfe = bits[53]
class SMEM(Inst64):
class SMEM(Inst):
encoding = bits[31:26] == 0b111101
op:Annotated[BitField, SMEMOp] = bits[25:18]
sdata:SGPRField = bits[12:6]
@@ -136,62 +135,63 @@ class SMEM(Inst64):
glc = bits[14]
dlc = bits[13]
class SOP1(Inst32):
class SOP1(Inst):
encoding = bits[31:23] == 0b101111101
op:Annotated[BitField, SOP1Op] = bits[15:8]
sdst:SGPRField = bits[22:16]
ssrc0:SSrc = bits[7:0]
class SOP2(Inst32):
class SOP2(Inst):
encoding = bits[31:30] == 0b10
op:Annotated[BitField, SOP2Op] = bits[29:23]
sdst:SGPRField = bits[22:16]
ssrc0:SSrc = bits[7:0]
ssrc1:SSrc = bits[15:8]
class SOPC(Inst32):
class SOPC(Inst):
encoding = bits[31:23] == 0b101111110
op:Annotated[BitField, SOPCOp] = bits[22:16]
ssrc0:SSrc = bits[7:0]
ssrc1:SSrc = bits[15:8]
class SOPK(Inst32):
class SOPK(Inst):
encoding = bits[31:28] == 0b1011
op:Annotated[BitField, SOPKOp] = bits[27:23]
sdst:SGPRField = bits[22:16]
simm16:SImm = bits[15:0]
class SOPP(Inst32):
class SOPP(Inst):
encoding = bits[31:23] == 0b101111111
op:Annotated[BitField, SOPPOp] = bits[22:16]
simm16:SImm = bits[15:0]
class VINTERP(Inst64):
class VINTERP(Inst):
encoding = bits[31:24] == 0b11001101
op:Annotated[BitField, VINTERPOp] = bits[22:16]
vdst:VGPRField = bits[7:0]
src0:Src = bits[40:32]
src0:Src = bits[40:32]
src1:Src = bits[49:41]
src2:Src = bits[58:50]
waitexp = bits[10:8]
neg = bits[63:61]
clmp = bits[15]
opsel = bits[14:11]
neg = bits[63:61]
waitexp = bits[10:8]
class VOP1(Inst32):
encoding = bits[31:25] == 0b111111
class VOP1(Inst):
encoding = bits[31:25] == 0b0111111
op:Annotated[BitField, VOP1Op] = bits[16:9]
vdst:VGPRField = bits[24:17]
src0:Src = bits[8:0]
class VOP2(Inst32):
encoding = bits[31] == 0
class VOP2(Inst):
encoding = bits[31] == 0b0
op:Annotated[BitField, VOP2Op] = bits[30:25]
vdst:VGPRField = bits[24:17]
src0:Src = bits[8:0]
vsrc1:VGPRField = bits[16:9]
class VOP3(Inst64):
class VOP3(Inst):
encoding = bits[31:26] == 0b110101
op:Annotated[BitField, VOP3Op] = bits[25:16]
vdst:VGPRField = bits[7:0]
@@ -204,9 +204,8 @@ class VOP3(Inst64):
clmp = bits[15]
opsel = bits[14:11]
class VOP3P(Inst64):
class VOP3P(Inst):
encoding = bits[31:24] == 0b11001100
_defaults = {'opsel_hi': 3, 'opsel_hi2': 1}
op:Annotated[BitField, VOP3POp] = bits[22:16]
vdst:VGPRField = bits[7:0]
src0:Src = bits[40:32]
@@ -214,12 +213,12 @@ class VOP3P(Inst64):
src2:Src = bits[58:50]
neg = bits[63:61]
neg_hi = bits[10:8]
clmp = bits[15]
opsel = bits[13:11]
opsel_hi = bits[60:59]
clmp = bits[15]
opsel_hi2 = bits[14]
class VOP3SD(Inst64):
class VOP3SD(Inst):
encoding = bits[31:26] == 0b110101
op:Annotated[BitField, VOP3SDOp] = bits[25:16]
vdst:VGPRField = bits[7:0]
@@ -227,26 +226,26 @@ class VOP3SD(Inst64):
src0:Src = bits[40:32]
src1:Src = bits[49:41]
src2:Src = bits[58:50]
clmp = bits[15]
omod = bits[60:59]
neg = bits[63:61]
clmp = bits[15]
class VOPC(Inst32):
encoding = bits[31:25] == 0b111110
class VOPC(Inst):
encoding = bits[31:25] == 0b0111110
op:Annotated[BitField, VOPCOp] = bits[24:17]
src0:Src = bits[8:0]
vsrc1:VGPRField = bits[16:9]
class VOPD(Inst64):
class VOPD(Inst):
encoding = bits[31:26] == 0b110010
opx:Annotated[BitField, VOPDOp] = bits[25:22]
opy:Annotated[BitField, VOPDOp] = bits[21:17]
vdstx:VGPRField = bits[63:56]
vdstx = bits[63:56]
vdsty:VDSTYEnc = bits[55:49]
srcx0:Src = bits[8:0]
vsrcx1:VGPRField = bits[16:9]
srcy0:Src = bits[40:32]
vsrcy1:VGPRField = bits[48:41]
vsrcx1 = bits[16:9]
vsrcy1 = bits[48:41]
# instruction helpers
ds_add_u32 = functools.partial(DS, DSOp.DS_ADD_U32)
@@ -1077,16 +1076,16 @@ v_add_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_ADD_NC_U32)
v_sub_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUB_NC_U32)
v_subrev_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_NC_U32)
v_fmac_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F32)
def v_fmamk_f32_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F32, vdst, src0, vsrc1, literal=K)
def v_fmaak_f32_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F32, vdst, src0, vsrc1, literal=K)
v_fmamk_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAMK_F32)
v_fmaak_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAAK_F32)
v_cvt_pk_rtz_f16_f32_e32 = functools.partial(VOP2, VOP2Op.V_CVT_PK_RTZ_F16_F32)
v_add_f16_e32 = functools.partial(VOP2, VOP2Op.V_ADD_F16)
v_sub_f16_e32 = functools.partial(VOP2, VOP2Op.V_SUB_F16)
v_subrev_f16_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_F16)
v_mul_f16_e32 = functools.partial(VOP2, VOP2Op.V_MUL_F16)
v_fmac_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F16)
def v_fmamk_f16_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F16, vdst, src0, vsrc1, literal=K)
def v_fmaak_f16_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F16, vdst, src0, vsrc1, literal=K)
v_fmamk_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAMK_F16)
v_fmaak_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAAK_F16)
v_max_f16_e32 = functools.partial(VOP2, VOP2Op.V_MAX_F16)
v_min_f16_e32 = functools.partial(VOP2, VOP2Op.V_MIN_F16)
v_ldexp_f16_e32 = functools.partial(VOP2, VOP2Op.V_LDEXP_F16)
@@ -1554,7 +1553,6 @@ v_wmma_f16_16x16x16_f16 = functools.partial(VOP3P, VOP3POp.V_WMMA_F16_16X16X16_F
v_wmma_bf16_16x16x16_bf16 = functools.partial(VOP3P, VOP3POp.V_WMMA_BF16_16X16X16_BF16)
v_wmma_i32_16x16x16_iu8 = functools.partial(VOP3P, VOP3POp.V_WMMA_I32_16X16X16_IU8)
v_wmma_i32_16x16x16_iu4 = functools.partial(VOP3P, VOP3POp.V_WMMA_I32_16X16X16_IU4)
dword = functools.partial(VOP3SD, VOP3SDOp.DWORD)
v_add_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_ADD_CO_CI_U32)
v_sub_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUB_CO_CI_U32)
v_subrev_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUBREV_CO_CI_U32)
@@ -1771,31 +1769,4 @@ v_dual_dot2acc_f32_f16 = functools.partial(VOPD, VOPDOp.V_DUAL_DOT2ACC_F32_F16)
v_dual_dot2acc_f32_bf16 = functools.partial(VOPD, VOPDOp.V_DUAL_DOT2ACC_F32_BF16)
v_dual_add_nc_u32 = functools.partial(VOPD, VOPDOp.V_DUAL_ADD_NC_U32)
v_dual_lshlrev_b32 = functools.partial(VOPD, VOPDOp.V_DUAL_LSHLREV_B32)
v_dual_and_b32 = functools.partial(VOPD, VOPDOp.V_DUAL_AND_B32)
VCC_LO = SrcEnum.VCC_LO
VCC_HI = SrcEnum.VCC_HI
NULL = SrcEnum.NULL
M0 = SrcEnum.M0
EXEC_LO = SrcEnum.EXEC_LO
EXEC_HI = SrcEnum.EXEC_HI
ZERO = SrcEnum.ZERO
DPP8FI = SrcEnum.DPP8FI
SHARED_BASE = SrcEnum.SHARED_BASE
SHARED_LIMIT = SrcEnum.SHARED_LIMIT
PRIVATE_BASE = SrcEnum.PRIVATE_BASE
PRIVATE_LIMIT = SrcEnum.PRIVATE_LIMIT
POS_HALF = SrcEnum.POS_HALF
NEG_HALF = SrcEnum.NEG_HALF
POS_ONE = SrcEnum.POS_ONE
NEG_ONE = SrcEnum.NEG_ONE
POS_TWO = SrcEnum.POS_TWO
NEG_TWO = SrcEnum.NEG_TWO
POS_FOUR = SrcEnum.POS_FOUR
NEG_FOUR = SrcEnum.NEG_FOUR
INV_2PI = SrcEnum.INV_2PI
VCCZ = SrcEnum.VCCZ
EXECZ = SrcEnum.EXECZ
SCC = SrcEnum.SCC
LDS_DIRECT = SrcEnum.LDS_DIRECT
OFF = NULL
v_dual_and_b32 = functools.partial(VOPD, VOPDOp.V_DUAL_AND_B32)

File diff suppressed because it is too large Load Diff

View File

@@ -1,34 +1,100 @@
# autogenerated from AMD RDNA4 ISA PDF by pdf.py - do not edit
# autogenerated from AMD ISA PDF by pdf.py - do not edit
from enum import IntEnum
class SrcEnum(IntEnum):
VCC_LO = 106
VCC_HI = 107
NULL = 124
M0 = 125
EXEC_LO = 126
EXEC_HI = 127
ZERO = 128
DPP8 = 233
DPP8FI = 234
SHARED_BASE = 235
SHARED_LIMIT = 236
PRIVATE_BASE = 237
PRIVATE_LIMIT = 238
POS_HALF = 240
NEG_HALF = 241
POS_ONE = 242
NEG_ONE = 243
POS_TWO = 244
NEG_TWO = 245
POS_FOUR = 246
NEG_FOUR = 247
INV_2PI = 248
DPP16 = 250
VCCZ = 251
EXECZ = 252
SCC = 253
LDS_DIRECT = 254
class BufFmt(IntEnum):
BUF_FMT_8_UNORM = 1
BUF_FMT_8_SNORM = 2
BUF_FMT_8_USCALED = 3
BUF_FMT_8_SSCALED = 4
BUF_FMT_8_UINT = 5
BUF_FMT_8_SINT = 6
BUF_FMT_16_UNORM = 7
BUF_FMT_16_SNORM = 8
BUF_FMT_16_USCALED = 9
BUF_FMT_16_SSCALED = 10
BUF_FMT_16_UINT = 11
BUF_FMT_16_SINT = 12
BUF_FMT_16_FLOAT = 13
BUF_FMT_8_8_UNORM = 14
BUF_FMT_8_8_SNORM = 15
BUF_FMT_8_8_USCALED = 16
BUF_FMT_8_8_SSCALED = 17
BUF_FMT_8_8_UINT = 18
BUF_FMT_8_8_SINT = 19
BUF_FMT_32_UINT = 20
BUF_FMT_32_SINT = 21
BUF_FMT_32_FLOAT = 22
BUF_FMT_16_16_UNORM = 23
BUF_FMT_16_16_SNORM = 24
BUF_FMT_16_16_USCALED = 25
BUF_FMT_16_16_SSCALED = 26
BUF_FMT_16_16_UINT = 27
BUF_FMT_16_16_SINT = 28
BUF_FMT_16_16_FLOAT = 29
BUF_FMT_10_11_11_FLOAT = 30
BUF_FMT_11_11_10_FLOAT = 31
BUF_FMT_10_10_10_2_UNORM = 32
BUF_FMT_10_10_10_2_SNORM = 33
BUF_FMT_10_10_10_2_UINT = 34
BUF_FMT_10_10_10_2_SINT = 35
BUF_FMT_2_10_10_10_UNORM = 36
BUF_FMT_2_10_10_10_SNORM = 37
BUF_FMT_2_10_10_10_USCALED = 38
BUF_FMT_2_10_10_10_SSCALED = 39
BUF_FMT_2_10_10_10_UINT = 40
BUF_FMT_2_10_10_10_SINT = 41
BUF_FMT_8_8_8_8_UNORM = 42
BUF_FMT_8_8_8_8_SNORM = 43
BUF_FMT_8_8_8_8_USCALED = 44
BUF_FMT_8_8_8_8_SSCALED = 45
BUF_FMT_8_8_8_8_UINT = 46
BUF_FMT_8_8_8_8_SINT = 47
BUF_FMT_32_32_UINT = 48
BUF_FMT_32_32_SINT = 49
BUF_FMT_32_32_FLOAT = 50
BUF_FMT_16_16_16_16_UNORM = 51
BUF_FMT_16_16_16_16_SNORM = 52
BUF_FMT_16_16_16_16_USCALED = 53
BUF_FMT_16_16_16_16_SSCALED = 54
BUF_FMT_16_16_16_16_UINT = 55
BUF_FMT_16_16_16_16_SINT = 56
BUF_FMT_16_16_16_16_FLOAT = 57
BUF_FMT_32_32_32_UINT = 58
BUF_FMT_32_32_32_SINT = 59
BUF_FMT_32_32_32_FLOAT = 60
BUF_FMT_32_32_32_32_UINT = 61
BUF_FMT_32_32_32_32_SINT = 62
BUF_FMT_32_32_32_32_FLOAT = 63
BUF_FMT_8_SRGB = 64
BUF_FMT_8_8_SRGB = 65
BUF_FMT_8_8_8_8_SRGB = 66
BUF_FMT_5_9_9_9_FLOAT = 67
BUF_FMT_5_6_5_UNORM = 68
BUF_FMT_1_5_5_5_UNORM = 69
BUF_FMT_5_5_5_1_UNORM = 70
BUF_FMT_4_4_4_4_UNORM = 71
BUF_FMT_4_4_UNORM = 72
BUF_FMT_1_UNORM = 73
BUF_FMT_1_REVERSED_UNORM = 74
BUF_FMT_32_FLOAT_CLAMP = 75
BUF_FMT_8_24_UNORM = 76
BUF_FMT_8_24_UINT = 77
BUF_FMT_24_8_UNORM = 78
BUF_FMT_24_8_UINT = 79
BUF_FMT_X24_8_32_UINT = 80
BUF_FMT_X24_8_32_FLOAT = 81
BUF_FMT_GB_GR_UNORM = 82
BUF_FMT_GB_GR_SNORM = 83
BUF_FMT_GB_GR_UINT = 84
BUF_FMT_GB_GR_SRGB = 85
BUF_FMT_BG_RG_UNORM = 86
BUF_FMT_BG_RG_SNORM = 87
BUF_FMT_BG_RG_UINT = 88
BUF_FMT_BG_RG_SRGB = 89
BUF_FMT_BC1_UNORM = 109
BUF_FMT_BC1_SRGB = 110
BUF_FMT_BC2_UNORM = 111
BUF_FMT_BC2_SRGB = 112
class DSOp(IntEnum):
DS_ADD_U32 = 0
@@ -1347,7 +1413,6 @@ class VOP3POp(IntEnum):
V_SWMMAC_F32_16X16X32_BF8_BF8 = 90
class VOP3SDOp(IntEnum):
DWORD = 1
V_ADD_CO_CI_U32 = 288
V_SUB_CO_CI_U32 = 289
V_SUBREV_CO_CI_U32 = 290
@@ -1627,52 +1692,3 @@ class VSCRATCHOp(IntEnum):
SCRATCH_STORE_D16_HI_B16 = 37
SCRATCH_LOAD_BLOCK = 83
SCRATCH_STORE_BLOCK = 84
class BufFmt(IntEnum):
BUF_FMT_8_UNORM = 1
BUF_FMT_8_SNORM = 2
BUF_FMT_8_USCALED = 3
BUF_FMT_8_SSCALED = 4
BUF_FMT_8_UINT = 5
BUF_FMT_8_SINT = 6
BUF_FMT_16_UNORM = 7
BUF_FMT_16_SNORM = 8
BUF_FMT_16_USCALED = 9
BUF_FMT_16_SSCALED = 10
BUF_FMT_16_UINT = 11
BUF_FMT_16_SINT = 12
BUF_FMT_16_FLOAT = 13
BUF_FMT_8_8_UNORM = 14
BUF_FMT_8_8_SNORM = 15
BUF_FMT_8_8_USCALED = 16
BUF_FMT_8_8_SSCALED = 17
BUF_FMT_8_8_UINT = 18
BUF_FMT_8_8_SINT = 19
BUF_FMT_32_UINT = 20
BUF_FMT_32_SINT = 21
BUF_FMT_32_FLOAT = 22
BUF_FMT_16_16_UNORM = 23
BUF_FMT_10_10_10_2_UNORM = 32
BUF_FMT_10_10_10_2_SNORM = 33
BUF_FMT_10_10_10_2_UINT = 34
BUF_FMT_10_10_10_2_SINT = 35
BUF_FMT_2_10_10_10_UNORM = 36
BUF_FMT_2_10_10_10_SNORM = 37
BUF_FMT_2_10_10_10_USCALED = 38
BUF_FMT_2_10_10_10_SSCALED = 39
BUF_FMT_2_10_10_10_UINT = 40
BUF_FMT_2_10_10_10_SINT = 41
BUF_FMT_8_8_8_8_UNORM = 42
BUF_FMT_8_8_8_8_SNORM = 43
BUF_FMT_8_8_8_8_USCALED = 44
BUF_FMT_8_8_8_8_SSCALED = 45
BUF_FMT_8_8_8_8_UINT = 46
BUF_FMT_8_8_8_8_SINT = 47
BUF_FMT_32_32_UINT = 48
BUF_FMT_32_32_SINT = 49
BUF_FMT_32_32_FLOAT = 50
BUF_FMT_16_16_16_16_UNORM = 51
BUF_FMT_16_16_16_16_SNORM = 52
BUF_FMT_16_16_16_16_USCALED = 53
BUF_FMT_16_16_16_16_SSCALED = 54
BUF_FMT_16_16_16_16_UINT = 55

View File

@@ -1,12 +1,11 @@
# autogenerated from AMD RDNA4 ISA PDF by pdf.py - do not edit
# autogenerated from AMD ISA PDF by pdf.py - do not edit
# ruff: noqa: F401,F403
from typing import Annotated
from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, Inst96, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
from extra.assembly.amd.dsl import *
from extra.assembly.amd.autogen.rdna4.enum import *
import functools
# instruction formats
class DPP16(Inst64):
class DPP16(Inst):
src0:Src = bits[39:32]
dpp_ctrl = bits[48:40]
fi = bits[50]
@@ -18,7 +17,7 @@ class DPP16(Inst64):
bank_mask = bits[59:56]
row_mask = bits[63:60]
class DPP8(Inst64):
class DPP8(Inst):
src0:Src = bits[39:32]
lane_sel0 = bits[42:40]
lane_sel1 = bits[45:43]
@@ -29,7 +28,17 @@ class DPP8(Inst64):
lane_sel6 = bits[60:58]
lane_sel7 = bits[63:61]
class SMEM(Inst64):
class DS(Inst):
encoding = bits[31:26] == 0b110110
op:Annotated[BitField, DSOp] = bits[25:18]
vdst:VGPRField = bits[63:56]
addr:VGPRField = bits[39:32]
data0:VGPRField = bits[47:40]
data1:VGPRField = bits[55:48]
offset0 = bits[7:0]
offset1 = bits[15:8]
class SMEM(Inst):
encoding = bits[31:26] == 0b111101
op:Annotated[BitField, SMEMOp] = bits[18:13]
sdata:SGPRField = bits[12:6]
@@ -39,153 +48,116 @@ class SMEM(Inst64):
th = bits[24:23]
ioffset = bits[55:32]
class SOP1(Inst32):
class SOP1(Inst):
encoding = bits[31:23] == 0b101111101
op:Annotated[BitField, SOP1Op] = bits[15:8]
sdst:SGPRField = bits[22:16]
ssrc0:SSrc = bits[7:0]
class SOP2(Inst32):
class SOP2(Inst):
encoding = bits[31:30] == 0b10
op:Annotated[BitField, SOP2Op] = bits[29:23]
sdst:SGPRField = bits[22:16]
ssrc0:SSrc = bits[7:0]
ssrc1:SSrc = bits[15:8]
class SOPC(Inst32):
class SOPC(Inst):
encoding = bits[31:23] == 0b101111110
op:Annotated[BitField, SOPCOp] = bits[22:16]
ssrc0:SSrc = bits[7:0]
ssrc1:SSrc = bits[15:8]
class SOPK(Inst32):
class SOPK(Inst):
encoding = bits[31:28] == 0b1011
op:Annotated[BitField, SOPKOp] = bits[27:23]
sdst:SGPRField = bits[22:16]
simm16:SImm = bits[15:0]
class SOPP(Inst32):
class SOPP(Inst):
encoding = bits[31:23] == 0b101111111
op:Annotated[BitField, SOPPOp] = bits[22:16]
simm16:SImm = bits[15:0]
class VBUFFER(Inst96):
class VBUFFER(Inst):
encoding = bits[31:26] == 0b110001
soffset:SSrc = bits[6:0]
op:Annotated[BitField, VBUFFEROp] = bits[21:14]
tfe = bits[22]
vdata:VGPRField = bits[39:32]
rsrc = bits[49:41]
scope = bits[51:50]
th = bits[54:52]
vaddr:VGPRField = bits[71:64]
soffset:SSrc = bits[6:0]
format = bits[61:55]
offen = bits[62]
idxen = bits[63]
vaddr:VGPRField = bits[71:64]
tfe = bits[22]
rsrc = bits[49:41]
scope = bits[51:50]
th = bits[54:52]
ioffset = bits[95:72]
class VDS(Inst64):
encoding = bits[31:26] == 0b110110
offset0 = bits[7:0]
offset1 = bits[15:8]
op = bits[25:18]
addr:VGPRField = bits[39:32]
data0:VGPRField = bits[47:40]
data1:VGPRField = bits[55:48]
vdst:VGPRField = bits[63:56]
class VDSDIR(Inst64):
encoding = bits[31:24] == 0b11001101
class VDSDIR(Inst):
encoding = bits[31:24] == 0b11001110
op:Annotated[BitField, VDSDIROp] = bits[21:20]
vdst:VGPRField = bits[7:0]
waitexp = bits[10:8]
opsel = bits[14:11]
cm = bits[15]
op:Annotated[BitField, VDSDIROp] = bits[20:16]
src0:Src = bits[40:32]
src1:Src = bits[49:41]
src2:Src = bits[58:50]
neg = bits[63:61]
attr = bits[15:10]
attr_chan = bits[9:8]
wait_va = bits[19:16]
wait_vmvsrc = bits[23]
class VEXPORT(Inst64):
class VEXPORT(Inst):
encoding = bits[31:26] == 0b111110
vsrc0:VGPRField = bits[39:32]
vsrc1:VGPRField = bits[47:40]
vsrc2:VGPRField = bits[55:48]
vsrc3:VGPRField = bits[63:56]
en = bits[3:0]
target = bits[9:4]
done = bits[11]
row = bits[13]
vsrc0 = bits[39:32]
vsrc1:VGPRField = bits[47:40]
vsrc2 = bits[55:48]
vsrc3 = bits[63:56]
class VFLAT(Inst96):
encoding = bits[31:24] == 0b11101100
saddr:SSrc = bits[6:0]
op:Annotated[BitField, VFLATOp] = bits[20:14]
vdst:VGPRField = bits[39:32]
sve = bits[49]
scope = bits[51:50]
th = bits[54:52]
vsrc = bits[62:55]
vaddr:VGPRField = bits[71:64]
ioffset = bits[95:72]
class VGLOBAL(Inst96):
encoding = bits[31:24] == 0b11101110
saddr:SSrc = bits[6:0]
op:Annotated[BitField, VGLOBALOp] = bits[20:14]
vdst:VGPRField = bits[39:32]
sve = bits[49]
scope = bits[51:50]
th = bits[54:52]
vsrc = bits[62:55]
vaddr:VGPRField = bits[71:64]
ioffset = bits[95:72]
class VIMAGE(Inst96):
class VIMAGE(Inst):
encoding = bits[31:26] == 0b110100
op:Annotated[BitField, VIMAGEOp] = bits[21:14]
vdata:VGPRField = bits[39:32]
dmask = bits[25:22]
dim = bits[2:0]
tfe = bits[55]
r128 = bits[4]
d16 = bits[5]
a16 = bits[6]
op:Annotated[BitField, VIMAGEOp] = bits[21:14]
dmask = bits[25:22]
vdata:VGPRField = bits[39:32]
rsrc = bits[49:41]
scope = bits[51:50]
th = bits[54:52]
tfe = bits[55]
vaddr4 = bits[56:63]
vaddr0 = bits[71:64]
vaddr1 = bits[79:72]
vaddr2 = bits[87:80]
vaddr3 = bits[95:88]
class VINTERP(Inst64):
class VINTERP(Inst):
encoding = bits[31:24] == 0b11001101
op:Annotated[BitField, VINTERPOp] = bits[20:16]
vdst:VGPRField = bits[7:0]
src0:Src = bits[40:32]
src1:Src = bits[49:41]
src2:Src = bits[58:50]
waitexp = bits[10:8]
opsel = bits[14:11]
neg = bits[63:61]
opsel = bits[14:11]
waitexp = bits[10:8]
cm = bits[15]
class VOP1(Inst32):
encoding = bits[31:25] == 0b111111
class VOP1(Inst):
encoding = bits[31:25] == 0b0111111
op:Annotated[BitField, VOP1Op] = bits[15:9]
vdst:VGPRField = bits[24:17]
src0:Src = bits[8:0]
class VOP2(Inst32):
encoding = bits[31] == 0
class VOP2(Inst):
encoding = bits[31] == 0b0
op:Annotated[BitField, VOP2Op] = bits[30:25]
vdst:VGPRField = bits[24:17]
src0:Src = bits[8:0]
vsrc1:VGPRField = bits[16:9]
class VOP3(Inst64):
class VOP3(Inst):
encoding = bits[31:26] == 0b110101
op:Annotated[BitField, VOP3Op] = bits[25:16]
vdst:VGPRField = bits[7:0]
@@ -198,9 +170,8 @@ class VOP3(Inst64):
opsel = bits[14:11]
cm = bits[15]
class VOP3P(Inst64):
class VOP3P(Inst):
encoding = bits[31:24] == 0b11001100
_defaults = {'opsel_hi': 3, 'opsel_hi2': 1}
op:Annotated[BitField, VOP3POp] = bits[22:16]
vdst:VGPRField = bits[7:0]
src0:Src = bits[40:32]
@@ -213,7 +184,7 @@ class VOP3P(Inst64):
opsel_hi2 = bits[14]
cm = bits[15]
class VOP3SD(Inst64):
class VOP3SD(Inst):
encoding = bits[31:26] == 0b110101
op:Annotated[BitField, VOP3SDOp] = bits[25:16]
vdst:VGPRField = bits[7:0]
@@ -221,38 +192,38 @@ class VOP3SD(Inst64):
src0:Src = bits[40:32]
src1:Src = bits[49:41]
src2:Src = bits[58:50]
cm = bits[15]
omod = bits[60:59]
neg = bits[63:61]
cm = bits[15]
class VOPC(Inst32):
encoding = bits[31:25] == 0b111110
class VOPC(Inst):
encoding = bits[31:25] == 0b0111110
op:Annotated[BitField, VOPCOp] = bits[24:17]
src0:Src = bits[8:0]
vsrc1:VGPRField = bits[16:9]
class VOPD(Inst64):
class VOPD(Inst):
encoding = bits[31:26] == 0b110010
opx:Annotated[BitField, VOPDOp] = bits[25:22]
opy:Annotated[BitField, VOPDOp] = bits[21:17]
vdstx:VGPRField = bits[63:56]
vdstx = bits[63:56]
vdsty:VDSTYEnc = bits[55:49]
srcx0:Src = bits[8:0]
vsrcx1:VGPRField = bits[16:9]
srcy0:Src = bits[40:32]
vsrcy1:VGPRField = bits[48:41]
vsrcx1 = bits[16:9]
vsrcy1 = bits[48:41]
class VSAMPLE(Inst96):
class VSAMPLE(Inst):
encoding = bits[31:26] == 0b111001
op:Annotated[BitField, VSAMPLEOp] = bits[21:14]
vdata:VGPRField = bits[39:32]
dmask = bits[25:22]
dim = bits[2:0]
tfe = bits[3]
unrm = bits[13]
r128 = bits[4]
d16 = bits[5]
a16 = bits[6]
unrm = bits[13]
op:Annotated[BitField, VSAMPLEOp] = bits[21:14]
dmask = bits[25:22]
vdata:VGPRField = bits[39:32]
lwe = bits[40]
rsrc = bits[49:41]
scope = bits[51:50]
@@ -263,19 +234,130 @@ class VSAMPLE(Inst96):
vaddr2 = bits[87:80]
vaddr3 = bits[95:88]
class VSCRATCH(Inst96):
encoding = bits[31:24] == 0b11101101
saddr:SSrc = bits[6:0]
op:Annotated[BitField, VSCRATCHOp] = bits[20:14]
vdst:VGPRField = bits[39:32]
sve = bits[49]
scope = bits[51:50]
th = bits[54:52]
vsrc = bits[62:55]
vaddr:VGPRField = bits[71:64]
ioffset = bits[95:72]
# instruction helpers
ds_add_u32 = functools.partial(DS, DSOp.DS_ADD_U32)
ds_sub_u32 = functools.partial(DS, DSOp.DS_SUB_U32)
ds_rsub_u32 = functools.partial(DS, DSOp.DS_RSUB_U32)
ds_inc_u32 = functools.partial(DS, DSOp.DS_INC_U32)
ds_dec_u32 = functools.partial(DS, DSOp.DS_DEC_U32)
ds_min_i32 = functools.partial(DS, DSOp.DS_MIN_I32)
ds_max_i32 = functools.partial(DS, DSOp.DS_MAX_I32)
ds_min_u32 = functools.partial(DS, DSOp.DS_MIN_U32)
ds_max_u32 = functools.partial(DS, DSOp.DS_MAX_U32)
ds_and_b32 = functools.partial(DS, DSOp.DS_AND_B32)
ds_or_b32 = functools.partial(DS, DSOp.DS_OR_B32)
ds_xor_b32 = functools.partial(DS, DSOp.DS_XOR_B32)
ds_mskor_b32 = functools.partial(DS, DSOp.DS_MSKOR_B32)
ds_store_b32 = functools.partial(DS, DSOp.DS_STORE_B32)
ds_store_2addr_b32 = functools.partial(DS, DSOp.DS_STORE_2ADDR_B32)
ds_store_2addr_stride64_b32 = functools.partial(DS, DSOp.DS_STORE_2ADDR_STRIDE64_B32)
ds_cmpstore_b32 = functools.partial(DS, DSOp.DS_CMPSTORE_B32)
ds_min_num_f32 = functools.partial(DS, DSOp.DS_MIN_NUM_F32)
ds_max_num_f32 = functools.partial(DS, DSOp.DS_MAX_NUM_F32)
ds_nop = functools.partial(DS, DSOp.DS_NOP)
ds_add_f32 = functools.partial(DS, DSOp.DS_ADD_F32)
ds_store_b8 = functools.partial(DS, DSOp.DS_STORE_B8)
ds_store_b16 = functools.partial(DS, DSOp.DS_STORE_B16)
ds_add_rtn_u32 = functools.partial(DS, DSOp.DS_ADD_RTN_U32)
ds_sub_rtn_u32 = functools.partial(DS, DSOp.DS_SUB_RTN_U32)
ds_rsub_rtn_u32 = functools.partial(DS, DSOp.DS_RSUB_RTN_U32)
ds_inc_rtn_u32 = functools.partial(DS, DSOp.DS_INC_RTN_U32)
ds_dec_rtn_u32 = functools.partial(DS, DSOp.DS_DEC_RTN_U32)
ds_min_rtn_i32 = functools.partial(DS, DSOp.DS_MIN_RTN_I32)
ds_max_rtn_i32 = functools.partial(DS, DSOp.DS_MAX_RTN_I32)
ds_min_rtn_u32 = functools.partial(DS, DSOp.DS_MIN_RTN_U32)
ds_max_rtn_u32 = functools.partial(DS, DSOp.DS_MAX_RTN_U32)
ds_and_rtn_b32 = functools.partial(DS, DSOp.DS_AND_RTN_B32)
ds_or_rtn_b32 = functools.partial(DS, DSOp.DS_OR_RTN_B32)
ds_xor_rtn_b32 = functools.partial(DS, DSOp.DS_XOR_RTN_B32)
ds_mskor_rtn_b32 = functools.partial(DS, DSOp.DS_MSKOR_RTN_B32)
ds_storexchg_rtn_b32 = functools.partial(DS, DSOp.DS_STOREXCHG_RTN_B32)
ds_storexchg_2addr_rtn_b32 = functools.partial(DS, DSOp.DS_STOREXCHG_2ADDR_RTN_B32)
ds_storexchg_2addr_stride64_rtn_b32 = functools.partial(DS, DSOp.DS_STOREXCHG_2ADDR_STRIDE64_RTN_B32)
ds_cmpstore_rtn_b32 = functools.partial(DS, DSOp.DS_CMPSTORE_RTN_B32)
ds_min_num_rtn_f32 = functools.partial(DS, DSOp.DS_MIN_NUM_RTN_F32)
ds_max_num_rtn_f32 = functools.partial(DS, DSOp.DS_MAX_NUM_RTN_F32)
ds_swizzle_b32 = functools.partial(DS, DSOp.DS_SWIZZLE_B32)
ds_load_b32 = functools.partial(DS, DSOp.DS_LOAD_B32)
ds_load_2addr_b32 = functools.partial(DS, DSOp.DS_LOAD_2ADDR_B32)
ds_load_2addr_stride64_b32 = functools.partial(DS, DSOp.DS_LOAD_2ADDR_STRIDE64_B32)
ds_load_i8 = functools.partial(DS, DSOp.DS_LOAD_I8)
ds_load_u8 = functools.partial(DS, DSOp.DS_LOAD_U8)
ds_load_i16 = functools.partial(DS, DSOp.DS_LOAD_I16)
ds_load_u16 = functools.partial(DS, DSOp.DS_LOAD_U16)
ds_consume = functools.partial(DS, DSOp.DS_CONSUME)
ds_append = functools.partial(DS, DSOp.DS_APPEND)
ds_add_u64 = functools.partial(DS, DSOp.DS_ADD_U64)
ds_sub_u64 = functools.partial(DS, DSOp.DS_SUB_U64)
ds_rsub_u64 = functools.partial(DS, DSOp.DS_RSUB_U64)
ds_inc_u64 = functools.partial(DS, DSOp.DS_INC_U64)
ds_dec_u64 = functools.partial(DS, DSOp.DS_DEC_U64)
ds_min_i64 = functools.partial(DS, DSOp.DS_MIN_I64)
ds_max_i64 = functools.partial(DS, DSOp.DS_MAX_I64)
ds_min_u64 = functools.partial(DS, DSOp.DS_MIN_U64)
ds_max_u64 = functools.partial(DS, DSOp.DS_MAX_U64)
ds_and_b64 = functools.partial(DS, DSOp.DS_AND_B64)
ds_or_b64 = functools.partial(DS, DSOp.DS_OR_B64)
ds_xor_b64 = functools.partial(DS, DSOp.DS_XOR_B64)
ds_mskor_b64 = functools.partial(DS, DSOp.DS_MSKOR_B64)
ds_store_b64 = functools.partial(DS, DSOp.DS_STORE_B64)
ds_store_2addr_b64 = functools.partial(DS, DSOp.DS_STORE_2ADDR_B64)
ds_store_2addr_stride64_b64 = functools.partial(DS, DSOp.DS_STORE_2ADDR_STRIDE64_B64)
ds_cmpstore_b64 = functools.partial(DS, DSOp.DS_CMPSTORE_B64)
ds_min_num_f64 = functools.partial(DS, DSOp.DS_MIN_NUM_F64)
ds_max_num_f64 = functools.partial(DS, DSOp.DS_MAX_NUM_F64)
ds_add_rtn_u64 = functools.partial(DS, DSOp.DS_ADD_RTN_U64)
ds_sub_rtn_u64 = functools.partial(DS, DSOp.DS_SUB_RTN_U64)
ds_rsub_rtn_u64 = functools.partial(DS, DSOp.DS_RSUB_RTN_U64)
ds_inc_rtn_u64 = functools.partial(DS, DSOp.DS_INC_RTN_U64)
ds_dec_rtn_u64 = functools.partial(DS, DSOp.DS_DEC_RTN_U64)
ds_min_rtn_i64 = functools.partial(DS, DSOp.DS_MIN_RTN_I64)
ds_max_rtn_i64 = functools.partial(DS, DSOp.DS_MAX_RTN_I64)
ds_min_rtn_u64 = functools.partial(DS, DSOp.DS_MIN_RTN_U64)
ds_max_rtn_u64 = functools.partial(DS, DSOp.DS_MAX_RTN_U64)
ds_and_rtn_b64 = functools.partial(DS, DSOp.DS_AND_RTN_B64)
ds_or_rtn_b64 = functools.partial(DS, DSOp.DS_OR_RTN_B64)
ds_xor_rtn_b64 = functools.partial(DS, DSOp.DS_XOR_RTN_B64)
ds_mskor_rtn_b64 = functools.partial(DS, DSOp.DS_MSKOR_RTN_B64)
ds_storexchg_rtn_b64 = functools.partial(DS, DSOp.DS_STOREXCHG_RTN_B64)
ds_storexchg_2addr_rtn_b64 = functools.partial(DS, DSOp.DS_STOREXCHG_2ADDR_RTN_B64)
ds_storexchg_2addr_stride64_rtn_b64 = functools.partial(DS, DSOp.DS_STOREXCHG_2ADDR_STRIDE64_RTN_B64)
ds_cmpstore_rtn_b64 = functools.partial(DS, DSOp.DS_CMPSTORE_RTN_B64)
ds_min_num_rtn_f64 = functools.partial(DS, DSOp.DS_MIN_NUM_RTN_F64)
ds_max_num_rtn_f64 = functools.partial(DS, DSOp.DS_MAX_NUM_RTN_F64)
ds_load_b64 = functools.partial(DS, DSOp.DS_LOAD_B64)
ds_load_2addr_b64 = functools.partial(DS, DSOp.DS_LOAD_2ADDR_B64)
ds_load_2addr_stride64_b64 = functools.partial(DS, DSOp.DS_LOAD_2ADDR_STRIDE64_B64)
ds_add_rtn_f32 = functools.partial(DS, DSOp.DS_ADD_RTN_F32)
ds_condxchg32_rtn_b64 = functools.partial(DS, DSOp.DS_CONDXCHG32_RTN_B64)
ds_cond_sub_u32 = functools.partial(DS, DSOp.DS_COND_SUB_U32)
ds_sub_clamp_u32 = functools.partial(DS, DSOp.DS_SUB_CLAMP_U32)
ds_pk_add_f16 = functools.partial(DS, DSOp.DS_PK_ADD_F16)
ds_pk_add_bf16 = functools.partial(DS, DSOp.DS_PK_ADD_BF16)
ds_store_b8_d16_hi = functools.partial(DS, DSOp.DS_STORE_B8_D16_HI)
ds_store_b16_d16_hi = functools.partial(DS, DSOp.DS_STORE_B16_D16_HI)
ds_load_u8_d16 = functools.partial(DS, DSOp.DS_LOAD_U8_D16)
ds_load_u8_d16_hi = functools.partial(DS, DSOp.DS_LOAD_U8_D16_HI)
ds_load_i8_d16 = functools.partial(DS, DSOp.DS_LOAD_I8_D16)
ds_load_i8_d16_hi = functools.partial(DS, DSOp.DS_LOAD_I8_D16_HI)
ds_load_u16_d16 = functools.partial(DS, DSOp.DS_LOAD_U16_D16)
ds_load_u16_d16_hi = functools.partial(DS, DSOp.DS_LOAD_U16_D16_HI)
ds_cond_sub_rtn_u32 = functools.partial(DS, DSOp.DS_COND_SUB_RTN_U32)
ds_sub_clamp_rtn_u32 = functools.partial(DS, DSOp.DS_SUB_CLAMP_RTN_U32)
ds_pk_add_rtn_f16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_F16)
ds_pk_add_rtn_bf16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_BF16)
ds_store_addtid_b32 = functools.partial(DS, DSOp.DS_STORE_ADDTID_B32)
ds_load_addtid_b32 = functools.partial(DS, DSOp.DS_LOAD_ADDTID_B32)
ds_permute_b32 = functools.partial(DS, DSOp.DS_PERMUTE_B32)
ds_bpermute_b32 = functools.partial(DS, DSOp.DS_BPERMUTE_B32)
ds_bpermute_fi_b32 = functools.partial(DS, DSOp.DS_BPERMUTE_FI_B32)
ds_store_b96 = functools.partial(DS, DSOp.DS_STORE_B96)
ds_store_b128 = functools.partial(DS, DSOp.DS_STORE_B128)
ds_bvh_stack_push4_pop1_rtn_b32 = functools.partial(DS, DSOp.DS_BVH_STACK_PUSH4_POP1_RTN_B32)
ds_bvh_stack_push8_pop1_rtn_b32 = functools.partial(DS, DSOp.DS_BVH_STACK_PUSH8_POP1_RTN_B32)
ds_bvh_stack_push8_pop2_rtn_b64 = functools.partial(DS, DSOp.DS_BVH_STACK_PUSH8_POP2_RTN_B64)
ds_load_b96 = functools.partial(DS, DSOp.DS_LOAD_B96)
ds_load_b128 = functools.partial(DS, DSOp.DS_LOAD_B128)
s_load_b32 = functools.partial(SMEM, SMEMOp.S_LOAD_B32)
s_load_b64 = functools.partial(SMEM, SMEMOp.S_LOAD_B64)
s_load_b128 = functools.partial(SMEM, SMEMOp.S_LOAD_B128)
@@ -647,126 +729,6 @@ tbuffer_store_d16_format_xyz = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STOR
tbuffer_store_d16_format_xyzw = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STORE_D16_FORMAT_XYZW)
ds_param_load = functools.partial(VDSDIR, VDSDIROp.DS_PARAM_LOAD)
ds_direct_load = functools.partial(VDSDIR, VDSDIROp.DS_DIRECT_LOAD)
flat_load_u8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_U8)
flat_load_i8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_I8)
flat_load_u16 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_U16)
flat_load_i16 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_I16)
flat_load_b32 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_B32)
flat_load_b64 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_B64)
flat_load_b96 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_B96)
flat_load_b128 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_B128)
flat_store_b8 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B8)
flat_store_b16 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B16)
flat_store_b32 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B32)
flat_store_b64 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B64)
flat_store_b96 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B96)
flat_store_b128 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B128)
flat_load_d16_u8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_U8)
flat_load_d16_i8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_I8)
flat_load_d16_b16 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_B16)
flat_load_d16_hi_u8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_HI_U8)
flat_load_d16_hi_i8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_HI_I8)
flat_load_d16_hi_b16 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_HI_B16)
flat_store_d16_hi_b8 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_D16_HI_B8)
flat_store_d16_hi_b16 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_D16_HI_B16)
flat_atomic_swap_b32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_SWAP_B32)
flat_atomic_cmpswap_b32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_CMPSWAP_B32)
flat_atomic_add_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_ADD_U32)
flat_atomic_sub_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_SUB_U32)
flat_atomic_sub_clamp_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_SUB_CLAMP_U32)
flat_atomic_min_i32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MIN_I32)
flat_atomic_min_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MIN_U32)
flat_atomic_max_i32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MAX_I32)
flat_atomic_max_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MAX_U32)
flat_atomic_and_b32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_AND_B32)
flat_atomic_or_b32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_OR_B32)
flat_atomic_xor_b32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_XOR_B32)
flat_atomic_inc_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_INC_U32)
flat_atomic_dec_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_DEC_U32)
flat_atomic_swap_b64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_SWAP_B64)
flat_atomic_cmpswap_b64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_CMPSWAP_B64)
flat_atomic_add_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_ADD_U64)
flat_atomic_sub_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_SUB_U64)
flat_atomic_min_i64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MIN_I64)
flat_atomic_min_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MIN_U64)
flat_atomic_max_i64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MAX_I64)
flat_atomic_max_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MAX_U64)
flat_atomic_and_b64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_AND_B64)
flat_atomic_or_b64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_OR_B64)
flat_atomic_xor_b64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_XOR_B64)
flat_atomic_inc_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_INC_U64)
flat_atomic_dec_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_DEC_U64)
flat_atomic_cond_sub_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_COND_SUB_U32)
flat_atomic_min_num_f32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MIN_NUM_F32)
flat_atomic_max_num_f32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MAX_NUM_F32)
flat_atomic_add_f32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_ADD_F32)
flat_atomic_pk_add_f16 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_PK_ADD_F16)
flat_atomic_pk_add_bf16 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_PK_ADD_BF16)
global_load_u8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_U8)
global_load_i8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_I8)
global_load_u16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_U16)
global_load_i16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_I16)
global_load_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_B32)
global_load_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_B64)
global_load_b96 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_B96)
global_load_b128 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_B128)
global_store_b8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B8)
global_store_b16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B16)
global_store_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B32)
global_store_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B64)
global_store_b96 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B96)
global_store_b128 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B128)
global_load_d16_u8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_U8)
global_load_d16_i8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_I8)
global_load_d16_b16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_B16)
global_load_d16_hi_u8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_HI_U8)
global_load_d16_hi_i8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_HI_I8)
global_load_d16_hi_b16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_HI_B16)
global_store_d16_hi_b8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_D16_HI_B8)
global_store_d16_hi_b16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_D16_HI_B16)
global_load_addtid_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_ADDTID_B32)
global_store_addtid_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_ADDTID_B32)
global_inv = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_INV)
global_wb = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_WB)
global_atomic_swap_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_SWAP_B32)
global_atomic_cmpswap_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_CMPSWAP_B32)
global_atomic_add_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_ADD_U32)
global_atomic_sub_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_SUB_U32)
global_atomic_sub_clamp_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_SUB_CLAMP_U32)
global_atomic_min_i32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MIN_I32)
global_atomic_min_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MIN_U32)
global_atomic_max_i32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MAX_I32)
global_atomic_max_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MAX_U32)
global_atomic_and_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_AND_B32)
global_atomic_or_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_OR_B32)
global_atomic_xor_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_XOR_B32)
global_atomic_inc_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_INC_U32)
global_atomic_dec_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_DEC_U32)
global_atomic_swap_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_SWAP_B64)
global_atomic_cmpswap_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_CMPSWAP_B64)
global_atomic_add_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_ADD_U64)
global_atomic_sub_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_SUB_U64)
global_atomic_min_i64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MIN_I64)
global_atomic_min_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MIN_U64)
global_atomic_max_i64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MAX_I64)
global_atomic_max_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MAX_U64)
global_atomic_and_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_AND_B64)
global_atomic_or_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_OR_B64)
global_atomic_xor_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_XOR_B64)
global_atomic_inc_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_INC_U64)
global_atomic_dec_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_DEC_U64)
global_wbinv = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_WBINV)
global_atomic_cond_sub_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_COND_SUB_U32)
global_atomic_min_num_f32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MIN_NUM_F32)
global_atomic_max_num_f32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MAX_NUM_F32)
global_load_block = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_BLOCK)
global_store_block = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_BLOCK)
global_atomic_add_f32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_ADD_F32)
global_load_tr_b128 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_TR_B128)
global_load_tr_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_TR_B64)
global_atomic_pk_add_f16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_PK_ADD_F16)
global_atomic_pk_add_bf16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_PK_ADD_BF16)
global_atomic_ordered_add_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_ORDERED_ADD_B64)
image_load = functools.partial(VIMAGE, VIMAGEOp.IMAGE_LOAD)
image_load_mip = functools.partial(VIMAGE, VIMAGEOp.IMAGE_LOAD_MIP)
image_load_pck = functools.partial(VIMAGE, VIMAGEOp.IMAGE_LOAD_PCK)
@@ -931,8 +893,8 @@ v_add_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_ADD_NC_U32)
v_sub_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUB_NC_U32)
v_subrev_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_NC_U32)
v_fmac_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F32)
def v_fmamk_f32_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F32, vdst, src0, vsrc1, literal=K)
def v_fmaak_f32_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F32, vdst, src0, vsrc1, literal=K)
v_fmamk_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAMK_F32)
v_fmaak_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAAK_F32)
v_cvt_pk_rtz_f16_f32_e32 = functools.partial(VOP2, VOP2Op.V_CVT_PK_RTZ_F16_F32)
v_min_num_f16_e32 = functools.partial(VOP2, VOP2Op.V_MIN_NUM_F16)
v_max_num_f16_e32 = functools.partial(VOP2, VOP2Op.V_MAX_NUM_F16)
@@ -941,8 +903,8 @@ v_sub_f16_e32 = functools.partial(VOP2, VOP2Op.V_SUB_F16)
v_subrev_f16_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_F16)
v_mul_f16_e32 = functools.partial(VOP2, VOP2Op.V_MUL_F16)
v_fmac_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F16)
def v_fmamk_f16_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F16, vdst, src0, vsrc1, literal=K)
def v_fmaak_f16_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F16, vdst, src0, vsrc1, literal=K)
v_fmamk_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAMK_F16)
v_fmaak_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAAK_F16)
v_ldexp_f16_e32 = functools.partial(VOP2, VOP2Op.V_LDEXP_F16)
v_pk_fmac_f16_e32 = functools.partial(VOP2, VOP2Op.V_PK_FMAC_F16)
v_cmp_lt_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LT_F16)
@@ -1435,7 +1397,6 @@ v_swmmac_f32_16x16x32_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16
v_swmmac_f32_16x16x32_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_FP8_BF8)
v_swmmac_f32_16x16x32_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_BF8_FP8)
v_swmmac_f32_16x16x32_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_BF8_BF8)
dword = functools.partial(VOP3SD, VOP3SDOp.DWORD)
v_add_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_ADD_CO_CI_U32)
v_sub_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUB_CO_CI_U32)
v_subrev_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUBREV_CO_CI_U32)
@@ -1682,55 +1643,4 @@ image_gather4_c_cl = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4_C_CL)
image_gather4_c_l = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4_C_L)
image_gather4_c_b = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4_C_B)
image_gather4_c_b_cl = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4_C_B_CL)
image_gather4h = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4H)
scratch_load_u8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_U8)
scratch_load_i8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_I8)
scratch_load_u16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_U16)
scratch_load_i16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_I16)
scratch_load_b32 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_B32)
scratch_load_b64 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_B64)
scratch_load_b96 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_B96)
scratch_load_b128 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_B128)
scratch_store_b8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B8)
scratch_store_b16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B16)
scratch_store_b32 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B32)
scratch_store_b64 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B64)
scratch_store_b96 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B96)
scratch_store_b128 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B128)
scratch_load_d16_u8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_U8)
scratch_load_d16_i8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_I8)
scratch_load_d16_b16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_B16)
scratch_load_d16_hi_u8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_HI_U8)
scratch_load_d16_hi_i8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_HI_I8)
scratch_load_d16_hi_b16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_HI_B16)
scratch_store_d16_hi_b8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_D16_HI_B8)
scratch_store_d16_hi_b16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_D16_HI_B16)
scratch_load_block = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_BLOCK)
scratch_store_block = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_BLOCK)
VCC_LO = SrcEnum.VCC_LO
VCC_HI = SrcEnum.VCC_HI
NULL = SrcEnum.NULL
M0 = SrcEnum.M0
EXEC_LO = SrcEnum.EXEC_LO
EXEC_HI = SrcEnum.EXEC_HI
ZERO = SrcEnum.ZERO
DPP8FI = SrcEnum.DPP8FI
SHARED_BASE = SrcEnum.SHARED_BASE
SHARED_LIMIT = SrcEnum.SHARED_LIMIT
PRIVATE_BASE = SrcEnum.PRIVATE_BASE
PRIVATE_LIMIT = SrcEnum.PRIVATE_LIMIT
POS_HALF = SrcEnum.POS_HALF
NEG_HALF = SrcEnum.NEG_HALF
POS_ONE = SrcEnum.POS_ONE
NEG_ONE = SrcEnum.NEG_ONE
POS_TWO = SrcEnum.POS_TWO
NEG_TWO = SrcEnum.NEG_TWO
POS_FOUR = SrcEnum.POS_FOUR
NEG_FOUR = SrcEnum.NEG_FOUR
INV_2PI = SrcEnum.INV_2PI
VCCZ = SrcEnum.VCCZ
EXECZ = SrcEnum.EXECZ
SCC = SrcEnum.SCC
LDS_DIRECT = SrcEnum.LDS_DIRECT
OFF = NULL
image_gather4h = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4H)

File diff suppressed because one or more lines are too long

View File

@@ -9,6 +9,18 @@ from extra.assembly.amd.autogen.rdna3.enum import (VOP1Op, VOP2Op, VOP3Op, VOP3S
SOPCOp, SOPKOp, SOPPOp, SMEMOp, DSOp, FLATOp, MUBUFOp, MTBUFOp, MIMGOp, VINTERPOp)
from extra.assembly.amd.autogen.cdna.enum import VOP1Op as CDNA_VOP1Op, VOP2Op as CDNA_VOP2Op
# Source operand encoding - constant across all AMD ISAs
class SrcEnum(IntEnum):
VCC_LO=106; VCC_HI=107; NULL=124; M0=125; EXEC_LO=126; EXEC_HI=127; ZERO=128
DPP8=233; DPP8FI=234; SHARED_BASE=235; SHARED_LIMIT=236; PRIVATE_BASE=237; PRIVATE_LIMIT=238
POS_HALF=240; NEG_HALF=241; POS_ONE=242; NEG_ONE=243; POS_TWO=244; NEG_TWO=245
POS_FOUR=246; NEG_FOUR=247; INV_2PI=248; DPP16=250; VCCZ=251; EXECZ=252; SCC=253; LDS_DIRECT=254
VCC_LO, VCC_HI, NULL, M0, EXEC_LO, EXEC_HI, ZERO = SrcEnum.VCC_LO, SrcEnum.VCC_HI, SrcEnum.NULL, SrcEnum.M0, SrcEnum.EXEC_LO, SrcEnum.EXEC_HI, SrcEnum.ZERO
DPP8FI, SHARED_BASE, SHARED_LIMIT, PRIVATE_BASE, PRIVATE_LIMIT = SrcEnum.DPP8FI, SrcEnum.SHARED_BASE, SrcEnum.SHARED_LIMIT, SrcEnum.PRIVATE_BASE, SrcEnum.PRIVATE_LIMIT
POS_HALF, NEG_HALF, POS_ONE, NEG_ONE, POS_TWO, NEG_TWO = SrcEnum.POS_HALF, SrcEnum.NEG_HALF, SrcEnum.POS_ONE, SrcEnum.NEG_ONE, SrcEnum.POS_TWO, SrcEnum.NEG_TWO
POS_FOUR, NEG_FOUR, INV_2PI, VCCZ, EXECZ, SCC, LDS_DIRECT = SrcEnum.POS_FOUR, SrcEnum.NEG_FOUR, SrcEnum.INV_2PI, SrcEnum.VCCZ, SrcEnum.EXECZ, SrcEnum.SCC, SrcEnum.LDS_DIRECT
OFF = NULL
# Common masks and bit conversion functions
MASK32, MASK64, MASK128 = 0xffffffff, 0xffffffffffffffff, (1 << 128) - 1
_struct_f, _struct_I = struct.Struct("<f"), struct.Struct("<I")
@@ -288,7 +300,16 @@ class Inst:
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
cls._fields = {n: v[0] if isinstance(v, tuple) else v for n, v in cls.__dict__.items() if isinstance(v, BitField) or (isinstance(v, tuple) and len(v) == 2 and isinstance(v[0], BitField))}
# Merge fields from parent classes
cls._fields = {}
for base in reversed(cls.__mro__):
if base is Inst or not hasattr(base, '_fields'): continue
cls._fields.update(base._fields)
# Add this class's own fields (overrides parents)
cls._fields.update({n: v[0] if isinstance(v, tuple) else v for n, v in cls.__dict__.items() if isinstance(v, BitField) or (isinstance(v, tuple) and len(v) == 2 and isinstance(v[0], BitField))})
# Compute size from max bit (exclude optional fields starting at bit 64+, e.g. MIMG NSA)
max_bit = max((bf.hi for bf in cls._fields.values() if bf.lo < 64), default=0) if cls._fields else 0
cls._sz = 12 if max_bit > 63 else 8 if max_bit > 31 else 4
if 'encoding' in cls._fields and isinstance(cls.__dict__.get('encoding'), tuple): cls._encoding = cls.__dict__['encoding']
def _or_field(self, name: str, bit: int):
@@ -352,6 +373,16 @@ class Inst:
field_names = [n for n in self._fields if n != 'encoding']
# Map Python-friendly names to actual field names (abs_ -> abs for Python reserved word)
if 'abs_' in kwargs: kwargs['abs'] = kwargs.pop('abs_')
# If more args than fields, treat extra arg as literal (for FMAAK/FMAMK style instructions)
# FMAMK has K in middle (vdst, src0, K, vsrc1), FMAAK has K at end (vdst, src0, vsrc1, K)
args = list(args)
if len(args) > len(field_names) and literal is None:
for i, a in enumerate(args):
if isinstance(a, int) and not isinstance(a, SrcEnum) and i < len(field_names) and field_names[i] in ('vsrc1',):
literal = args.pop(i)
break
else:
literal = args.pop() # fallback: last arg is literal
orig_args = dict(zip(field_names, args)) | kwargs
self._values.update(orig_args)
self._precompute()
@@ -450,7 +481,7 @@ class Inst:
return result + (lit32 & MASK32).to_bytes(4, 'little')
@classmethod
def _size(cls) -> int: return 4 if issubclass(cls, Inst32) else 12 if issubclass(cls, Inst96) else 8
def _size(cls) -> int: return cls._sz
def size(self) -> int:
# Literal is always 4 bytes in the binary (for 64-bit ops, it's in high 32 bits)
return self._size() + (4 if self._literal is not None else 0)
@@ -583,6 +614,4 @@ class Inst:
def is_64bit(self) -> bool: return spec_is_64bit(self.op_name)
def is_dst_16(self) -> bool: return self._spec_regs[0] == 1 and is_dtype_16(self._spec_dtype[0])
class Inst32(Inst): pass
class Inst64(Inst): pass
class Inst96(Inst): pass

View File

@@ -7,8 +7,9 @@ from extra.assembly.amd.dsl import Inst, unwrap, FLOAT_ENC, MASK32, MASK64, _f32
from extra.assembly.amd.asm import detect_format
from extra.assembly.amd.pcode import compile_pseudocode
from extra.assembly.amd.autogen.rdna3.str_pcode import PSEUDOCODE_STRINGS
from extra.assembly.amd.dsl import SrcEnum
from extra.assembly.amd.autogen.rdna3.ins import (SOP1, SOP2, SOPC, SOPK, SOPP, SMEM, VOP1, VOP2, VOP3, VOP3SD, VOP3P, VOPC, DS, FLAT, VOPD,
SrcEnum, SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, SMEMOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp, DSOp, FLATOp, GLOBALOp, SCRATCHOp, VOPDOp)
SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, SMEMOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp, DSOp, FLATOp, GLOBALOp, SCRATCHOp, VOPDOp)
WAVE_SIZE, SGPR_COUNT, VGPR_COUNT = 32, 128, 256
VCC_LO, VCC_HI, NULL, EXEC_LO, EXEC_HI, SCC = SrcEnum.VCC_LO, SrcEnum.VCC_HI, SrcEnum.NULL, SrcEnum.EXEC_LO, SrcEnum.EXEC_HI, SrcEnum.SCC

View File

@@ -447,6 +447,27 @@ TWO_OVER_PI_1201 = Reg(0x0145f306dc9c882a53f84eafa3ea69bb81b6c52b3278872083fca2c
# COMPILER: pseudocode -> Python (minimal transforms)
# ═══════════════════════════════════════════════════════════════════════════════
def _filter_pseudocode(pseudocode: str) -> str:
"""Filter raw PDF pseudocode to only include actual code lines."""
pcode_lines, in_lambda, depth = [], 0, 0
for line in pseudocode.split('\n'):
s = line.strip()
if not s: continue
if '=>' in s or re.match(r'^[A-Z_]+\(', s): continue # Skip example lines
if '= lambda(' in s: in_lambda += 1; continue # Skip lambda definitions
if in_lambda > 0:
if s.endswith(');'): in_lambda -= 1
continue
# Only include lines that look like pseudocode
is_code = (any(p in s for p in ['D0.', 'D1.', 'S0.', 'S1.', 'S2.', 'SCC =', 'SCC ?', 'VCC', 'EXEC', 'tmp =', 'tmp[', 'lane =', 'PC =',
'D0[', 'D1[', 'S0[', 'S1[', 'S2[', 'MEM[', 'RETURN_DATA', 'VADDR', 'VDATA', 'VDST', 'SADDR', 'OFFSET']) or
s.startswith(('if ', 'else', 'elsif', 'endif', 'declare ', 'for ', 'endfor', '//')) or
re.match(r'^[a-z_]+\s*=', s) or re.match(r'^[a-z_]+\[', s) or (depth > 0 and '=' in s))
if s.startswith('if '): depth += 1
elif s.startswith('endif'): depth = max(0, depth - 1)
if is_code: pcode_lines.append(s)
return '\n'.join(pcode_lines)
def _compile_pseudocode(pseudocode: str) -> str:
"""Compile pseudocode to Python. Transforms are minimal - most syntax just works."""
pseudocode = re.sub(r'\bpass\b', 'pass_', pseudocode) # 'pass' is Python keyword
@@ -756,9 +777,10 @@ _PCODE_GLOBALS = {
@functools.cache
def compile_pseudocode(cls_name: str, op_name: str, pseudocode: str):
"""Compile pseudocode string to executable function. Cached for performance."""
code = _compile_pseudocode(pseudocode)
filtered = _filter_pseudocode(pseudocode)
code = _compile_pseudocode(filtered)
code = _apply_pseudocode_fixes(op_name, code)
fn_code = _generate_function(cls_name, op_name, pseudocode, code)
fn_code = _generate_function(cls_name, op_name, filtered, code)
fn_name = f"_{cls_name}_{op_name}"
local_ns = {}
exec(fn_code, _PCODE_GLOBALS, local_ns)

View File

@@ -1,457 +1,305 @@
# Generate AMD ISA autogen files from PDF documentation
# Combines format/enum generation (previously in dsl.py) and pseudocode compilation (previously in pcode.py)
# Usage: python -m extra.assembly.amd.pdf [--arch rdna3|rdna4|cdna|all]
import re, functools
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
# Generic PDF text extractor - no external dependencies
import re, zlib
from tinygrad.helpers import fetch, merge_dicts
PDF_URLS = {
"rdna3": "https://docs.amd.com/api/khub/documents/UVVZM22UN7tMUeiW_4ShTQ/content",
"rdna4": "https://docs.amd.com/api/khub/documents/uQpkEvk3pv~kfAb2x~j4uw/content",
"cdna": ["https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf",
"https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-cdna4-instruction-set-architecture.pdf"],
"cdna": "https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-cdna4-instruction-set-architecture.pdf",
}
# Field type mappings and ordering
FIELD_TYPES = {'SSRC0': 'SSrc', 'SSRC1': 'SSrc', 'SOFFSET': 'SSrc', 'SADDR': 'SSrc', 'SRC0': 'Src', 'SRC1': 'Src', 'SRC2': 'Src',
'SDST': 'SGPRField', 'SBASE': 'SGPRField', 'SDATA': 'SGPRField', 'SRSRC': 'SGPRField', 'VDST': 'VGPRField', 'VSRC1': 'VGPRField',
'VDATA': 'VGPRField', 'VADDR': 'VGPRField', 'ADDR': 'VGPRField', 'DATA': 'VGPRField', 'DATA0': 'VGPRField', 'DATA1': 'VGPRField',
'SIMM16': 'SImm', 'OFFSET': 'Imm', 'OPX': 'VOPDOp', 'OPY': 'VOPDOp', 'SRCX0': 'Src', 'SRCY0': 'Src',
'VSRCX1': 'VGPRField', 'VSRCY1': 'VGPRField', 'VDSTX': 'VGPRField', 'VDSTY': 'VDSTYEnc'}
FIELD_ORDER = {
'SOP2': ['op', 'sdst', 'ssrc0', 'ssrc1'], 'SOP1': ['op', 'sdst', 'ssrc0'], 'SOPC': ['op', 'ssrc0', 'ssrc1'],
'SOPK': ['op', 'sdst', 'simm16'], 'SOPP': ['op', 'simm16'], 'VOP1': ['op', 'vdst', 'src0'], 'VOPC': ['op', 'src0', 'vsrc1'],
'VOP2': ['op', 'vdst', 'src0', 'vsrc1'], 'VOP3SD': ['op', 'vdst', 'sdst', 'src0', 'src1', 'src2', 'clmp'],
'SMEM': ['op', 'sdata', 'sbase', 'soffset', 'offset', 'glc', 'dlc'], 'DS': ['op', 'vdst', 'addr', 'data0', 'data1'],
'VOP3': ['op', 'vdst', 'src0', 'src1', 'src2', 'omod', 'neg', 'abs', 'clmp', 'opsel'],
'VOP3P': ['op', 'vdst', 'src0', 'src1', 'src2', 'neg', 'neg_hi', 'opsel', 'opsel_hi', 'clmp'],
'FLAT': ['op', 'vdst', 'addr', 'data', 'saddr', 'offset', 'seg', 'dlc', 'glc', 'slc'],
'MUBUF': ['op', 'vdata', 'vaddr', 'srsrc', 'soffset', 'offset', 'offen', 'idxen', 'glc', 'dlc', 'slc', 'tfe'],
'MTBUF': ['op', 'vdata', 'vaddr', 'srsrc', 'soffset', 'offset', 'format', 'offen', 'idxen', 'glc', 'dlc', 'slc', 'tfe'],
'MIMG': ['op', 'vdata', 'vaddr', 'srsrc', 'ssamp', 'dmask', 'dim', 'unrm', 'dlc', 'glc', 'slc'],
'EXP': ['en', 'target', 'vsrc0', 'vsrc1', 'vsrc2', 'vsrc3', 'done', 'row'],
'VINTERP': ['op', 'vdst', 'src0', 'src1', 'src2', 'waitexp', 'clmp', 'opsel', 'neg'],
'VOPD': ['opx', 'opy', 'vdstx', 'vdsty', 'srcx0', 'vsrcx1', 'srcy0', 'vsrcy1'],
'LDSDIR': ['op', 'vdst', 'attr', 'attr_chan', 'wait_va']}
SRC_EXTRAS = {233: 'DPP8', 234: 'DPP8FI', 250: 'DPP16', 251: 'VCCZ', 252: 'EXECZ', 254: 'LDS_DIRECT'}
FLOAT_MAP = {'0.5': 'POS_HALF', '-0.5': 'NEG_HALF', '1.0': 'POS_ONE', '-1.0': 'NEG_ONE', '2.0': 'POS_TWO', '-2.0': 'NEG_TWO',
'4.0': 'POS_FOUR', '-4.0': 'NEG_FOUR', '1/(2*PI)': 'INV_2PI', '0': 'ZERO'}
INST_PATTERN = re.compile(r'^([SVD]S?_[A-Z0-9_]+|(?:FLAT|GLOBAL|SCRATCH)_[A-Z0-9_]+)\s+(\d+)\s*$', re.M)
# ═══════════════════════════════════════════════════════════════════════════════
# PDF PARSING WITH PAGE CACHING
# Generic PDF extraction tools
# ═══════════════════════════════════════════════════════════════════════════════
class CachedPDF:
"""PDF wrapper with page text/table caching for faster repeated access."""
def __init__(self, pdf):
self._pdf, self._text_cache, self._table_cache = pdf, {}, {}
def __len__(self): return len(self._pdf.pages)
def text(self, i):
if i not in self._text_cache: self._text_cache[i] = self._pdf.pages[i].extract_text() or ''
return self._text_cache[i]
def tables(self, i):
if i not in self._table_cache: self._table_cache[i] = [t.extract() for t in self._pdf.pages[i].find_tables()]
return self._table_cache[i]
def extract(url: str) -> list[list[tuple[float, float, str, str]]]:
"""Extract positioned text from PDF. Returns list of text elements (x, y, text, font) per page."""
data = fetch(url).read_bytes()
def _parse_bits(s: str) -> tuple[int, int] | None:
return (int(m.group(1)), int(m.group(2) or m.group(1))) if (m := re.match(r'\[(\d+)(?::(\d+))?\]', s)) else None
# Parse xref table to locate objects
xref: dict[int, int] = {}
pos = int(re.search(rb'startxref\s+(\d+)', data).group(1)) + 4
while data[pos:pos+7] != b'trailer':
while data[pos:pos+1] in b' \r\n': pos += 1
line_end = data.find(b'\n', pos)
start_obj, count = map(int, data[pos:line_end].split()[:2])
pos = line_end + 1
for i in range(count):
if data[pos+17:pos+18] == b'n' and (off := int(data[pos:pos+10])) > 0: xref[start_obj + i] = off
pos += 20
def _parse_fields_table(table: list, fmt: str, enums: set[str]) -> list[tuple]:
fields = []
for row in table[1:]:
if not row or not row[0]: continue
name, bits_str = row[0].split('\n')[0].strip(), (row[1] or '').split('\n')[0].strip()
if not (bits := _parse_bits(bits_str)): continue
enc_val, hi, lo = None, bits[0], bits[1]
if name == 'ENCODING' and row[2]:
desc = row[2]
# Handle shared FLAT/GLOBAL/SCRATCH table: look for format-specific encoding
fmt_key = fmt.lstrip('V').lower().capitalize() # VFLAT -> Flat, VGLOBAL -> Global
if m := re.search(rf"{fmt_key}='b([01_]+)", desc):
enc_bits = m.group(1).replace('_', '')
elif m := re.search(r"(?:'b|Must be:\s*)([01_]+)", desc):
enc_bits = m.group(1).replace('_', '')
else:
enc_bits = None
if enc_bits:
enc_val, declared_width, actual_width = int(enc_bits, 2), hi - lo + 1, len(enc_bits)
if actual_width > declared_width: lo = hi - actual_width + 1
ftype = f"{fmt}Op" if name == 'OP' and f"{fmt}Op" in enums else FIELD_TYPES.get(name.upper())
fields.append((name, hi, lo, enc_val, ftype))
return fields
def get_stream(n: int) -> bytes:
obj = data[xref[n]:data.find(b'endobj', xref[n])]
raw = obj[obj.find(b'stream\n') + 7:obj.find(b'\nendstream')]
return zlib.decompress(raw) if b'/FlateDecode' in obj else raw
def _parse_single_pdf(url: str):
"""Parse a single PDF and return (formats, enums, src_enum, doc_name, instructions)."""
import pdfplumber
from tinygrad.helpers import fetch
# Find page content streams and extract text
pages = []
for n in sorted(xref):
if b'/Type /Page' not in data[xref[n]:xref[n]+500]: continue
if not (m := re.search(rb'/Contents (\d+) 0 R', data[xref[n]:xref[n]+500])): continue
stream = get_stream(int(m.group(1))).decode('latin-1')
elements, font = [], ''
for bt in re.finditer(r'BT(.*?)ET', stream, re.S):
x, y = 0.0, 0.0
for m in re.finditer(r'(/F[\d.]+) [\d.]+ Tf|([\d.+-]+) ([\d.+-]+) Td|[\d.+-]+ [\d.+-]+ [\d.+-]+ [\d.+-]+ ([\d.+-]+) ([\d.+-]+) Tm|<([0-9A-Fa-f]+)>.*?Tj|\[([^\]]+)\] TJ', bt.group(1)):
if m.group(1): font = m.group(1)
elif m.group(2): x, y = x + float(m.group(2)), y + float(m.group(3))
elif m.group(4): x, y = float(m.group(4)), float(m.group(5))
elif m.group(6) and (t := bytes.fromhex(m.group(6)).decode('latin-1')).strip(): elements.append((x, y, t, font))
elif m.group(7) and (t := ''.join(bytes.fromhex(h).decode('latin-1') for h in re.findall(r'<([0-9A-Fa-f]+)>', m.group(7)))).strip(): elements.append((x, y, t, font))
pages.append(sorted(elements, key=lambda e: (-e[1], e[0])))
return pages
pdf = CachedPDF(pdfplumber.open(fetch(url)))
total_pages = len(pdf)
def extract_tables(pages: list[list[tuple[float, float, str, str]]]) -> dict[int, tuple[str, list[list[str]]]]:
"""Extract numbered tables from PDF pages. Returns {table_num: (title, rows)} where rows is list of cells per row."""
def group_by_y(texts, key=lambda y: round(y)):
by_y: dict[int, list[tuple[float, float, str]]] = {}
for x, y, t, _ in texts:
by_y.setdefault(key(y), []).append((x, y, t))
return by_y
# Auto-detect document type
first_page = pdf.text(0)
is_cdna4, is_cdna3 = 'CDNA4' in first_page or 'CDNA 4' in first_page, 'CDNA3' in first_page or 'MI300' in first_page
is_cdna, is_rdna4 = is_cdna3 or is_cdna4, 'RDNA4' in first_page or 'RDNA 4' in first_page
is_rdna35, is_rdna3 = 'RDNA3.5' in first_page or 'RDNA 3.5' in first_page, 'RDNA3' in first_page and 'RDNA3.5' not in first_page
doc_name = "CDNA4" if is_cdna4 else "CDNA3" if is_cdna3 else "RDNA4" if is_rdna4 else "RDNA3.5" if is_rdna35 else "RDNA3" if is_rdna3 else "Unknown"
# Find all table headers by merging text on same line
table_positions = []
for page_idx, texts in enumerate(pages):
for items in group_by_y(texts).values():
line = ''.join(t for _, t in sorted((x, t) for x, _, t in items))
if m := re.search(r'Table (\d+)\. (.+)', line):
table_positions.append((int(m.group(1)), m.group(2).strip(), page_idx, items[0][1]))
table_positions.sort(key=lambda t: (t[2], -t[3]))
# Find Microcode Formats section (for formats/enums)
microcode_start = next((i for i in range(int(total_pages * 0.2), total_pages)
if re.search(r'\d+\.\d+\.\d+\.\s+SOP2\b|Chapter \d+\.\s+Microcode Formats', pdf.text(i))), int(total_pages * 0.9))
# Find Instructions section (for pseudocode)
instr_start = next((i for i in range(int(total_pages * 0.1), int(total_pages * 0.5))
if re.search(r'Chapter \d+\.\s+Instructions\b', pdf.text(i))), total_pages // 3)
instr_end = next((i for start in [int(total_pages * 0.6), int(total_pages * 0.5), instr_start]
for i in range(start, min(start + 100, total_pages))
if re.search(r'Chapter \d+\.\s+Microcode Formats', pdf.text(i))), total_pages)
# Parse src enum from SSRC encoding table
src_enum = dict(SRC_EXTRAS)
for i in range(microcode_start, min(microcode_start + 10, total_pages)):
text = pdf.text(i)
if 'SSRC0' in text and 'VCC_LO' in text:
for m in re.finditer(r'^(\d+)\s+(\S+)', text, re.M):
val, name = int(m.group(1)), m.group(2).rstrip('.:')
if name in FLOAT_MAP: src_enum[val] = FLOAT_MAP[name]
elif re.match(r'^[A-Z][A-Z0-9_]*$', name): src_enum[val] = name
# For each table, find rows with matching X positions
result: dict[int, tuple[str, list[list[str]]]] = {}
for num, title, start_page, header_y in table_positions:
rows, col_xs = [], None
for page_idx in range(start_page, len(pages)):
page_texts = [(x, y, t) for x, y, t, _ in pages[page_idx] if 30 < y < 760 and (page_idx > start_page or y < header_y)]
for items in sorted(group_by_y([(x, y, t, '') for x, y, t in page_texts], key=lambda y: round(y / 5)).values(), key=lambda items: -items[0][1]):
xs = tuple(sorted(round(x) for x, _, _ in items))
if col_xs is None:
if len(xs) < 2: continue # Skip single-column rows before table starts
col_xs = xs
elif len(xs) == 1 and xs[0] in col_xs: continue # Skip continuation rows at known column positions
elif not any(c in xs for c in col_xs[:2]): break # Row missing first columns = end of table
rows.append([t for _, t in sorted((x, t) for x, _, t in items)])
else: continue
break
if rows: result[num] = (title, rows)
return result
# Parse opcode tables
full_text = '\n'.join(pdf.text(i) for i in range(microcode_start, min(microcode_start + 50, total_pages)))
# ═══════════════════════════════════════════════════════════════════════════════
# AMD specific extraction
# ═══════════════════════════════════════════════════════════════════════════════
def extract_enums(tables: dict[int, tuple[str, list[list[str]]]]) -> dict[str, dict[int, str]]:
"""Extract all enums from tables. Returns {enum_name: {value: name}}."""
enums: dict[str, dict[int, str]] = {}
for m in re.finditer(r'Table \d+\. (\w+) Opcodes(.*?)(?=Table \d+\.|\n\d+\.\d+\.\d+\.\s+\w+\s*\nDescription|$)', full_text, re.S):
if ops := {int(x.group(1)): x.group(2) for x in re.finditer(r'(\d+)\s+([A-Z][A-Z0-9_]+)', m.group(2))}:
enums[m.group(1) + "Op"] = ops
if vopd_m := re.search(r'Table \d+\. VOPD Y-Opcodes\n(.*?)(?=Table \d+\.|15\.\d)', full_text, re.S):
if ops := {int(x.group(1)): x.group(2) for x in re.finditer(r'(\d+)\s+(V_DUAL_\w+)', vopd_m.group(1))}:
enums["VOPDOp"] = ops
enum_names = set(enums.keys())
for num, (title, rows) in tables.items():
# Opcode enums from "XXX Opcodes" tables
if m := re.match(r'(\w+) (?:Y-)?Opcodes', title):
fmt_name = 'VOPD' if 'Y-Opcodes' in title else m.group(1)
ops: dict[int, str] = {}
for row in rows:
for i in range(0, len(row) - 1, 2):
if row[i].isdigit() and re.match(r'^[A-Z][A-Z0-9_]+$', row[i + 1]):
ops[int(row[i])] = row[i + 1]
if ops: enums[fmt_name] = ops
# BufFmt from "Data Format" tables
if 'Data Format' in title:
for row in rows:
for i in range(0, len(row) - 1, 2):
if row[i].isdigit() and re.match(r'^[\dA-Z_]+$', row[i + 1]) and 'INVALID' not in row[i + 1]:
enums.setdefault('BufFmt', {})[int(row[i])] = row[i + 1]
return enums
# Parse instruction formats
def is_fields_table(t): return t and len(t) > 1 and t[0] and 'Field' in str(t[0][0] or '')
def has_encoding(fields): return any(f[0] == 'ENCODING' for f in fields)
def has_header_before_fields(text): return (pos := text.find('Field Name')) != -1 and bool(re.search(r'\d+\.\d+\.\d+\.\s+\w+\s*\n', text[:pos]))
def extract_ins(tables: dict[int, tuple[str, list[list[str]]]]) -> tuple[dict[str, list[tuple[str, int, int]]], dict[str, str]]:
"""Extract formats and encodings from 'XXX Fields' tables. Returns (formats, encodings)."""
formats: dict[str, list[tuple[str, int, int]]] = {}
encodings: dict[str, str] = {}
for num, (title, rows) in tables.items():
if not (m := re.match(r'(\w+) Fields$', title)): continue
fmt_name = m.group(1)
fields = []
for row in rows:
if len(row) < 2: continue
if (bits := re.match(r'\[?(\d+):(\d+)\]?$', row[1])) or (bits := re.match(r'\[(\d+)\]$', row[1])):
field_name = row[0].lower()
hi, lo = int(bits.group(1)), int(bits.group(2)) if bits.lastindex >= 2 else int(bits.group(1))
if field_name == 'encoding' and len(row) >= 3:
enc_bits = None
if "'b" in row[2]: enc_bits = row[2].split("'b")[-1].replace('_', '')
elif (enc := re.search(r':\s*([01_]+)', row[2])): enc_bits = enc.group(1).replace('_', '')
if enc_bits:
# If encoding bits exceed field width, extend field to match (AMD docs sometimes have this)
declared_width, actual_width = hi - lo + 1, len(enc_bits)
if actual_width > declared_width: lo = hi - actual_width + 1
encodings[fmt_name] = enc_bits
fields.append((field_name, hi, lo))
if fields: formats[fmt_name] = fields
return formats, encodings
format_headers = []
for i in range(50):
if microcode_start + i >= total_pages: break
text = pdf.text(microcode_start + i)
for m in re.finditer(r'\d+\.\d+\.\d+\.\s+(\w+)\s*\n?Description', text): format_headers.append((m.group(1), i, m.start()))
for m in re.finditer(r'\d+\.\d+\.\d+\.\s+(\w+)\s*\n', text):
fmt_name = m.group(1)
if is_cdna and fmt_name.isupper() and len(fmt_name) >= 2: format_headers.append((fmt_name, i, m.start()))
elif m.start() > len(text) - 200 and 'Description' not in text[m.end():] and i + 1 < 50:
next_text = pdf.text(microcode_start + i + 1).lstrip()
if next_text.startswith('Description') or (next_text.startswith('"RDNA') and 'Description' in next_text[:200]):
format_headers.append((fmt_name, i, m.start()))
# RDNA4: Look for "Table X. Y Fields" patterns (e.g., VIMAGE, VSAMPLE, or shared FLAT/GLOBAL/SCRATCH)
for m in re.finditer(r'Table \d+\.\s+([\w,\s]+?)\s+Fields', text):
table_name = m.group(1).strip()
# Handle shared table like "FLAT, GLOBAL and SCRATCH"
if ',' in table_name or ' and ' in table_name:
for part in re.split(r',\s*|\s+and\s+', table_name):
fmt_name = 'V' + part.strip()
if fmt_name not in [h[0] for h in format_headers]: format_headers.append((fmt_name, i, m.start()))
elif table_name.startswith('V'):
if table_name not in [h[0] for h in format_headers]: format_headers.append((table_name, i, m.start()))
def extract_pcode(pages: list[list[tuple[float, float, str, str]]], enums: dict[str, dict[int, str]]) -> dict[tuple[str, int], str]:
"""Extract pseudocode for instructions. Returns {(name, opcode): pseudocode}."""
# Build lookup from instruction name to opcode
name_to_op = {name: op for ops in enums.values() for op, name in ops.items()}
formats: dict[str, list] = {}
for fmt_name, rel_idx, header_pos in format_headers:
if fmt_name in formats: continue
page_idx = microcode_start + rel_idx
text = pdf.text(page_idx)
field_pos = text.find('Field Name', header_pos)
fields = None
for offset in range(3):
if page_idx + offset >= total_pages: break
if offset > 0 and has_header_before_fields(pdf.text(page_idx + offset)): break
for t in pdf.tables(page_idx + offset) if offset > 0 or field_pos > header_pos else []:
if is_fields_table(t) and (f := _parse_fields_table(t, fmt_name, enum_names)) and has_encoding(f): fields = f; break
if fields: break
if not fields and field_pos > header_pos:
for t in pdf.tables(page_idx):
if is_fields_table(t) and (f := _parse_fields_table(t, fmt_name, enum_names)): fields = f; break
if not fields: continue
field_names = {f[0] for f in fields}
for pg_offset in range(1, 3):
if page_idx + pg_offset >= total_pages or has_header_before_fields(pdf.text(page_idx + pg_offset)): break
for t in pdf.tables(page_idx + pg_offset):
if is_fields_table(t) and (extra := _parse_fields_table(t, fmt_name, enum_names)) and not has_encoding(extra):
for ef in extra:
if ef[0] not in field_names: fields.append(ef); field_names.add(ef[0])
break
formats[fmt_name] = fields
# First pass: find all instruction headers across all pages
all_instructions: list[tuple[int, float, str, int]] = [] # (page_idx, y, name, opcode)
for page_idx, page in enumerate(pages):
by_y: dict[int, list[tuple[float, str]]] = {}
for x, y, t, _ in page:
by_y.setdefault(round(y), []).append((x, t))
for y, items in sorted(by_y.items(), reverse=True):
left = [(x, t) for x, t in items if 55 < x < 65]
right = [(x, t) for x, t in items if 535 < x < 550]
if left and right and left[0][1] in name_to_op and right[0][1].isdigit():
all_instructions.append((page_idx, y, left[0][1], int(right[0][1])))
# Fix known PDF errors (RDNA-specific SMEM bit positions)
if 'SMEM' in formats and not is_cdna:
formats['SMEM'] = [(n, 13 if n == 'DLC' else 14 if n == 'GLC' else h, 13 if n == 'DLC' else 14 if n == 'GLC' else l, e, t)
for n, h, l, e, t in formats['SMEM']]
# RDNA4: VFLAT/VGLOBAL/VSCRATCH OP field is [20:14] not [20:13] (PDF documentation error)
for fmt_name in ['VFLAT', 'VGLOBAL', 'VSCRATCH']:
if fmt_name in formats:
formats[fmt_name] = [(n, h, 14 if n == 'OP' else l, e, t) for n, h, l, e, t in formats[fmt_name]]
if doc_name in ('RDNA3', 'RDNA3.5'):
if 'SOPPOp' in enums:
for k, v in {8: 'S_WAITCNT_DEPCTR', 58: 'S_TTRACEDATA', 59: 'S_TTRACEDATA_IMM'}.items():
assert k not in enums['SOPPOp']; enums['SOPPOp'][k] = v
if 'SOPKOp' in enums:
for k, v in {22: 'S_SUBVECTOR_LOOP_BEGIN', 23: 'S_SUBVECTOR_LOOP_END'}.items():
assert k not in enums['SOPKOp']; enums['SOPKOp'][k] = v
if 'SMEMOp' in enums:
for k, v in {34: 'S_ATC_PROBE', 35: 'S_ATC_PROBE_BUFFER'}.items():
assert k not in enums['SMEMOp']; enums['SMEMOp'][k] = v
if 'DSOp' in enums:
for k, v in {24: 'DS_GWS_SEMA_RELEASE_ALL', 25: 'DS_GWS_INIT', 26: 'DS_GWS_SEMA_V', 27: 'DS_GWS_SEMA_BR', 28: 'DS_GWS_SEMA_P', 29: 'DS_GWS_BARRIER'}.items():
assert k not in enums['DSOp']; enums['DSOp'][k] = v
if 'FLATOp' in enums:
for k, v in {40: 'GLOBAL_LOAD_ADDTID_B32', 41: 'GLOBAL_STORE_ADDTID_B32', 55: 'FLAT_ATOMIC_CSUB_U32'}.items():
assert k not in enums['FLATOp']; enums['FLATOp'][k] = v
# CDNA MTBUF: PDF is missing the FORMAT field (bits[25:19]) which is required for tbuffer_* instructions
if is_cdna and 'MTBUF' in formats:
field_names = {f[0] for f in formats['MTBUF']}
if 'FORMAT' not in field_names:
formats['MTBUF'].append(('FORMAT', 25, 19, None, None))
# CDNA SDWA/DPP: PDF only has modifier fields, need VOP1/VOP2 overlay for correct encoding
if is_cdna:
if 'SDWA' in formats:
formats['SDWA'] = [('ENCODING', 8, 0, 0xf9, None), ('VOP_OP', 16, 9, None, None), ('VDST', 24, 17, None, 'VGPRField'), ('VOP2_OP', 31, 25, None, None)] + \
[f for f in formats['SDWA'] if f[0] not in ('ENCODING', 'SDST', 'SD', 'ROW_MASK')]
if 'DPP' in formats:
formats['DPP'] = [('ENCODING', 8, 0, 0xfa, None), ('VOP_OP', 16, 9, None, None), ('VDST', 24, 17, None, 'VGPRField'), ('VOP2_OP', 31, 25, None, None),
('SRC0', 39, 32, None, 'Src'), ('DPP_CTRL', 48, 40, None, None), ('BOUND_CTRL', 51, 51, None, None), ('SRC0_NEG', 52, 52, None, None), ('SRC0_ABS', 53, 53, None, None),
('SRC1_NEG', 54, 54, None, None), ('SRC1_ABS', 55, 55, None, None), ('BANK_MASK', 59, 56, None, None), ('ROW_MASK', 63, 60, None, None)]
# Extract pseudocode for instructions
all_text = '\n'.join(pdf.text(i) for i in range(instr_start, instr_end))
matches = list(INST_PATTERN.finditer(all_text))
raw_pseudocode: dict[tuple[str, int], str] = {}
for i, match in enumerate(matches):
name, opcode = match.group(1), int(match.group(2))
start, end = match.end(), matches[i + 1].start() if i + 1 < len(matches) else match.end() + 2000
snippet = all_text[start:end].strip()
if pseudocode := _extract_pseudocode(snippet): raw_pseudocode[(name, opcode)] = pseudocode
# Extract unified buffer format table (RDNA only, for MTBUF format field)
buf_fmt = {}
if not is_cdna:
for i in range(total_pages):
for t in pdf.tables(i):
if t and len(t) > 2 and t[0] and '#' in str(t[0][0]) and 'Format' in str(t[0]):
for row in t[1:]:
for j in range(0, len(row) - 1, 3): # table has 3-column groups: #, Format, (empty)
if row[j] and row[j].isdigit() and row[j+1] and re.match(r'^[\d_]+_(UNORM|SNORM|USCALED|SSCALED|UINT|SINT|FLOAT)$', row[j+1]):
buf_fmt[int(row[j])] = row[j+1]
if buf_fmt: break
if buf_fmt: break
return {"formats": formats, "enums": enums, "src_enum": src_enum, "doc_name": doc_name, "pseudocode": raw_pseudocode, "is_cdna": is_cdna, "buf_fmt": buf_fmt}
def _extract_pseudocode(text: str) -> str | None:
"""Extract pseudocode from an instruction description snippet."""
lines, result, depth, in_lambda = text.split('\n'), [], 0, 0
for line in lines:
s = line.strip()
if not s or re.match(r'^\d+ of \d+$', s) or re.match(r'^\d+\.\d+\..*Instructions', s): continue
if s.startswith(('Notes', 'Functional examples', '', '-')): break # Stop at notes/bullets
if s.startswith(('"RDNA', 'AMD ', 'CDNA')): continue
if '' in s or '' in s: continue # Skip lines with bullets/dashes
if '= lambda(' in s: in_lambda += 1; continue
if in_lambda > 0:
if s.endswith(');'): in_lambda -= 1
continue
if s.startswith('if '): depth += 1
elif s.startswith('endif'): depth = max(0, depth - 1)
if s.endswith('.') and not any(p in s for p in ['D0', 'D1', 'S0', 'S1', 'S2', 'SCC', 'VCC', 'tmp', '=']): continue
if re.match(r'^[a-z].*\.$', s) and '=' not in s: continue
is_code = (any(p in s for p in ['D0.', 'D1.', 'S0.', 'S1.', 'S2.', 'SCC =', 'SCC ?', 'VCC', 'EXEC', 'tmp =', 'tmp[', 'lane =', 'PC =',
'D0[', 'D1[', 'S0[', 'S1[', 'S2[', 'MEM[', 'RETURN_DATA',
'VADDR', 'VDATA', 'VDST', 'SADDR', 'OFFSET']) or
s.startswith(('if ', 'else', 'elsif', 'endif', 'declare ', 'for ', 'endfor', '//')) or
re.match(r'^[a-z_]+\s*=', s) or re.match(r'^[a-z_]+\[', s) or (depth > 0 and '=' in s))
if is_code: result.append(s)
return '\n'.join(result) if result else None
def _merge_results(results: list[dict]) -> dict:
"""Merge multiple PDF parse results into a superset."""
merged = {"formats": {}, "enums": {}, "src_enum": dict(SRC_EXTRAS), "doc_names": [], "pseudocode": {}, "is_cdna": False, "buf_fmt": {}}
for r in results:
merged["doc_names"].append(r["doc_name"])
merged["is_cdna"] = merged["is_cdna"] or r["is_cdna"]
for val, name in r["src_enum"].items():
if val in merged["src_enum"]: assert merged["src_enum"][val] == name
else: merged["src_enum"][val] = name
for enum_name, ops in r["enums"].items():
if enum_name not in merged["enums"]: merged["enums"][enum_name] = {}
for val, name in ops.items():
if val in merged["enums"][enum_name]: assert merged["enums"][enum_name][val] == name
else: merged["enums"][enum_name][val] = name
for fmt_name, fields in r["formats"].items():
if fmt_name not in merged["formats"]: merged["formats"][fmt_name] = list(fields)
else:
existing = {f[0]: (f[1], f[2]) for f in merged["formats"][fmt_name]}
for f in fields:
if f[0] in existing: assert existing[f[0]] == (f[1], f[2])
else: merged["formats"][fmt_name].append(f)
for key, pc in r["pseudocode"].items():
if key not in merged["pseudocode"]: merged["pseudocode"][key] = pc
for val, name in r.get("buf_fmt", {}).items():
if val not in merged["buf_fmt"]: merged["buf_fmt"][val] = name
return merged
# Second pass: extract pseudocode between consecutive instructions
pcode: dict[tuple[str, int], str] = {}
for i, (page_idx, y, name, opcode) in enumerate(all_instructions):
# Get end boundary from next instruction
if i + 1 < len(all_instructions):
next_page, next_y = all_instructions[i + 1][0], all_instructions[i + 1][1]
else:
next_page, next_y = page_idx, 0
# Collect F6 text from current position to next instruction
lines = []
for p in range(page_idx, next_page + 1):
start_y = y if p == page_idx else 800
end_y = next_y if p == next_page else 0
lines.extend((p, y2, t) for x, y2, t, f in pages[p] if f in ('/F6.0', '/F7.0') and end_y < y2 < start_y)
if lines:
# Sort by page first, then by y descending within each page (higher y = earlier text in PDF)
pcode_lines = [t.replace('Ê', '').strip() for _, _, t in sorted(lines, key=lambda x: (x[0], -x[1]))]
if pcode_lines: pcode[(name, opcode)] = '\n'.join(pcode_lines)
return pcode
# ═══════════════════════════════════════════════════════════════════════════════
# CODE GENERATION
# Write autogen files
# ═══════════════════════════════════════════════════════════════════════════════
def _generate_enum_py(enums, src_enum, doc_name, buf_fmt=None) -> str:
"""Generate enum.py content (just enums, no dsl.py dependency)."""
def enum_lines(name, items): return [f"class {name}(IntEnum):"] + [f" {n} = {v}" for v, n in sorted(items.items())] + [""]
lines = [f"# autogenerated from AMD {doc_name} ISA PDF by pdf.py - do not edit", "from enum import IntEnum", ""]
lines += enum_lines("SrcEnum", src_enum) + sum([enum_lines(n, ops) for n, ops in sorted(enums.items())], [])
if buf_fmt: lines += enum_lines("BufFmt", {v: f"BUF_FMT_{n}" for v, n in buf_fmt.items() if 1 <= v <= 63})
return '\n'.join(lines)
def _generate_ins_py(formats, enums, src_enum, doc_name) -> str:
"""Generate ins.py content (instruction formats and helpers, imports dsl.py and enum.py)."""
def field_key(f, order): return order.index(f[0].lower()) if f[0].lower() in order else 1000
lines = [f"# autogenerated from AMD {doc_name} ISA PDF by pdf.py - do not edit",
"# ruff: noqa: F401,F403", "from typing import Annotated",
"from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, Inst96, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField",
"from extra.assembly.amd.autogen.{arch}.enum import *",
"import functools", ""]
format_defaults = {'VOP3P': {'opsel_hi': 3, 'opsel_hi2': 1}}
lines.append("# instruction formats")
# MIMG has optional NSA (Non-Sequential Address) fields that extend beyond 64 bits, but base encoding is 64-bit
inst64_override = {'MIMG'}
for fmt_name, fields in sorted(formats.items()):
max_bit = max(f[1] for f in fields)
if fmt_name in inst64_override: base = "Inst64"
else: base = "Inst96" if max_bit > 63 else "Inst64" if max_bit > 31 or fmt_name == 'VOP3SD' else "Inst32"
order = FIELD_ORDER.get(fmt_name, [])
lines.append(f"class {fmt_name}({base}):")
if enc := next((f for f in fields if f[0] == 'ENCODING'), None):
lines.append(f" encoding = bits[{enc[1]}:{enc[2]}] == 0b{enc[3]:b}" if enc[1] != enc[2] else f" encoding = bits[{enc[1]}] == {enc[3]}")
if defaults := format_defaults.get(fmt_name): lines.append(f" _defaults = {defaults}")
for name, hi, lo, _, ftype in sorted([f for f in fields if f[0] != 'ENCODING'], key=lambda f: field_key(f, order)):
ann = f":Annotated[BitField, {ftype}]" if ftype and ftype.endswith('Op') else f":{ftype}" if ftype else ""
lines.append(f" {name.lower()}{ann} = bits[{hi}]" if hi == lo else f" {name.lower()}{ann} = bits[{hi}:{lo}]")
def write_enums(enums: dict[str, dict[int, str]], arch: str, path: str):
"""Write enum.py file from extracted enums."""
lines = ["# autogenerated from AMD ISA PDF by pdf.py - do not edit", "from enum import IntEnum", ""]
for name, values in sorted(enums.items()):
suffix = "Op" if name not in ('Src', 'BufFmt') else ("Enum" if name == 'Src' else "")
prefix = "BUF_FMT_" if name == 'BufFmt' else ""
lines.append(f"class {name}{suffix}(IntEnum):")
for val, member in sorted(values.items()):
lines.append(f" {prefix}{member} = {val}")
lines.append("")
with open(path, "w") as f:
f.write("\n".join(lines))
def write_ins(formats: dict[str, list[tuple[str, int, int]]], encodings: dict[str, str], enums: dict[str, dict[int, str]], arch: str, path: str):
"""Write ins.py file from extracted formats and enums."""
# Field types and ordering
def field_type(name, fmt):
if name == 'op' and fmt in enums: return f'Annotated[BitField, {fmt}Op]'
if name in ('opx', 'opy'): return 'Annotated[BitField, VOPDOp]'
if name == 'vdsty': return 'VDSTYEnc'
if name in ('vdst', 'vsrc1', 'vaddr', 'vdata', 'data', 'data0', 'data1', 'addr', 'vsrc0', 'vsrc2', 'vsrc3'): return 'VGPRField'
if name in ('sdst', 'sbase', 'sdata', 'srsrc', 'ssamp'): return 'SGPRField'
if name.startswith('ssrc') or name in ('saddr', 'soffset'): return 'SSrc'
if name in ('src0', 'srcx0', 'srcy0') or name.startswith('src') and name[3:].isdigit(): return 'Src'
if name.startswith('simm'): return 'SImm'
if name == 'offset' or name.startswith('imm'): return 'Imm'
return None
field_priority = ['encoding', 'op', 'opx', 'opy', 'vdst', 'vdstx', 'vdsty', 'sdst', 'vdata', 'sdata', 'addr', 'vaddr', 'data', 'data0', 'data1',
'src0', 'srcx0', 'srcy0', 'vsrc0', 'ssrc0', 'src1', 'vsrc1', 'vsrcx1', 'vsrcy1', 'ssrc1', 'src2', 'vsrc2', 'src3', 'vsrc3',
'saddr', 'sbase', 'srsrc', 'ssamp', 'soffset', 'offset', 'simm16', 'en', 'target', 'attr', 'attr_chan',
'omod', 'neg', 'neg_hi', 'abs', 'clmp', 'opsel', 'opsel_hi', 'waitexp', 'wait_va',
'dmask', 'dim', 'seg', 'format', 'offen', 'idxen', 'glc', 'dlc', 'slc', 'tfe', 'unrm', 'done', 'row']
def sort_fields(fields):
order = {name: i for i, name in enumerate(field_priority)}
return sorted(fields, key=lambda f: (order.get(f[0], 1000), f[2]))
# Generate format classes
lines = ["# autogenerated from AMD ISA PDF by pdf.py - do not edit", "# ruff: noqa: F401,F403",
"from typing import Annotated",
"from extra.assembly.amd.dsl import *",
f"from extra.assembly.amd.autogen.{arch}.enum import *", "import functools", ""]
for fmt_name, fields in sorted(formats.items()):
lines.append(f"class {fmt_name}(Inst):")
for name, hi, lo in sort_fields(fields):
bits_str = f"bits[{hi}:{lo}]" if hi != lo else f"bits[{hi}]"
if name == 'encoding' and fmt_name in encodings: lines.append(f" encoding = {bits_str} == 0b{encodings[fmt_name]}")
else:
ftype = field_type(name, fmt_name)
lines.append(f" {name}{f':{ftype}' if ftype else ''} = {bits_str}")
lines.append("")
# Generate instruction helpers
lines.append("# instruction helpers")
for cls_name, ops in sorted(enums.items()):
fmt = cls_name[:-2]
for op_val, name in sorted(ops.items()):
seg = {"GLOBAL": ", seg=2", "SCRATCH": ", seg=1"}.get(fmt, "")
tgt = {"GLOBAL": "FLAT, GLOBALOp", "SCRATCH": "FLAT, SCRATCHOp"}.get(fmt, f"{fmt}, {cls_name}")
if fmt in formats or fmt in ("GLOBAL", "SCRATCH"):
suffix = "_e32" if fmt in ("VOP1", "VOP2", "VOPC") else "_e64" if fmt == "VOP3" and op_val < 512 else ""
if name in ('V_FMAMK_F32', 'V_FMAMK_F16'):
lines.append(f"def {name.lower()}{suffix}(vdst, src0, K, vsrc1): return {fmt}({cls_name}.{name}, vdst, src0, vsrc1, literal=K)")
elif name in ('V_FMAAK_F32', 'V_FMAAK_F16'):
lines.append(f"def {name.lower()}{suffix}(vdst, src0, vsrc1, K): return {fmt}({cls_name}.{name}, vdst, src0, vsrc1, literal=K)")
else: lines.append(f"{name.lower()}{suffix} = functools.partial({tgt}.{name}{seg})")
src_names = {name for _, name in src_enum.items()}
lines += [""] + [f"{name} = SrcEnum.{name}" for _, name in sorted(src_enum.items()) if name not in {'DPP8', 'DPP16'}]
if "NULL" in src_names: lines.append("OFF = NULL\n")
return '\n'.join(lines)
for fmt_name, ops in sorted(enums.items()):
seg = {"GLOBAL": ", seg=2", "SCRATCH": ", seg=1"}.get(fmt_name, "")
tgt = {"GLOBAL": "FLAT, GLOBALOp", "SCRATCH": "FLAT, SCRATCHOp"}.get(fmt_name, f"{fmt_name}, {fmt_name}Op")
suffix = "_e32" if fmt_name in ("VOP1", "VOP2", "VOPC") else "_e64" if fmt_name == "VOP3" and len(ops) > 0 else ""
if fmt_name in formats or fmt_name in ("GLOBAL", "SCRATCH"):
for op_val, name in sorted(ops.items()):
fn_suffix = suffix if fmt_name != "VOP3" or op_val < 512 else ""
lines.append(f"{name.lower()}{fn_suffix} = functools.partial({tgt}.{name}{seg})")
def _generate_str_pcode_py(enums, pseudocode, arch) -> str:
"""Generate str_pcode.py content (raw pseudocode strings)."""
# Get op enums for this arch (import from .ins which re-exports from .enum)
import importlib
autogen = importlib.import_module(f"extra.assembly.amd.autogen.{arch}.ins")
OP_ENUMS = [getattr(autogen, name) for name in ['SOP1Op', 'SOP2Op', 'SOPCOp', 'SOPKOp', 'SOPPOp', 'SMEMOp', 'VOP1Op', 'VOP2Op', 'VOP3Op', 'VOP3SDOp', 'VOP3POp', 'VOPCOp', 'VOP3AOp', 'VOP3BOp', 'DSOp', 'FLATOp', 'GLOBALOp', 'SCRATCHOp'] if hasattr(autogen, name)]
with open(path, "w") as f:
f.write("\n".join(lines))
# Build defined ops mapping
defined_ops: dict[tuple, list] = {}
for enum_cls in OP_ENUMS:
for op in enum_cls:
if op.name.startswith(('S_', 'V_', 'DS_', 'FLAT_', 'GLOBAL_', 'SCRATCH_')): defined_ops.setdefault((op.name, op.value), []).append((enum_cls, op))
enum_names = [e.__name__ for e in OP_ENUMS]
instructions: dict = {cls: {} for cls in OP_ENUMS}
for key, pc in pseudocode.items():
if key in defined_ops:
for enum_cls, enum_val in defined_ops[key]: instructions[enum_cls][enum_val] = pc
# Build string dictionaries for each enum
lines = [f'''# autogenerated by pdf.py - do not edit
# to regenerate: python -m extra.assembly.amd.pdf --arch {arch}
# ruff: noqa: E501
from extra.assembly.amd.autogen.{arch}.enum import {", ".join(enum_names)}
''']
all_dict_entries: dict = {}
for enum_cls in OP_ENUMS:
cls_name = enum_cls.__name__
if not instructions.get(enum_cls): continue
dict_entries = [(op, repr(pc)) for op, pc in instructions[enum_cls].items()]
if dict_entries:
all_dict_entries[enum_cls] = dict_entries
lines.append(f'{cls_name}_PCODE = {{')
for op, escaped in dict_entries: lines.append(f" {cls_name}.{op.name}: {escaped},")
lines.append('}\n')
lines.append('PSEUDOCODE_STRINGS = {')
for enum_cls in OP_ENUMS:
if all_dict_entries.get(enum_cls): lines.append(f' {enum_cls.__name__}: {enum_cls.__name__}_PCODE,')
lines.append('}')
return '\n'.join(lines)
# ═══════════════════════════════════════════════════════════════════════════════
# MAIN GENERATION
# ═══════════════════════════════════════════════════════════════════════════════
def generate_arch(arch: str) -> dict:
"""Generate enum.py, ins.py and str_pcode.py for a single architecture."""
urls = PDF_URLS[arch]
if isinstance(urls, str): urls = [urls]
print(f"\n{'='*60}\nGenerating {arch}...")
print(f"Parsing {len(urls)} PDF(s)...")
results = [_parse_single_pdf(url) for url in urls]
merged = _merge_results(results) if len(results) > 1 else results[0]
doc_name = "+".join(merged["doc_names"]) if len(results) > 1 else merged["doc_name"]
base_path = Path(f"extra/assembly/amd/autogen/{arch}")
base_path.mkdir(parents=True, exist_ok=True)
(base_path / "__init__.py").touch()
# Write enum.py (enums only, no dsl.py dependency)
enum_path = base_path / "enum.py"
enum_content = _generate_enum_py(merged["enums"], merged["src_enum"], doc_name, merged.get("buf_fmt"))
enum_path.write_text(enum_content)
buf_fmt_count = len([v for v in merged.get("buf_fmt", {}) if 1 <= v <= 63])
print(f"Generated {enum_path}: SrcEnum ({len(merged['src_enum'])}) + {len(merged['enums'])} enums" + (f" + BufFmt ({buf_fmt_count})" if buf_fmt_count else ""))
# Write ins.py (instruction formats and helpers, imports dsl.py and enum.py)
ins_path = base_path / "ins.py"
ins_content = _generate_ins_py(merged["formats"], merged["enums"], merged["src_enum"], doc_name).replace("{arch}", arch)
ins_path.write_text(ins_content)
print(f"Generated {ins_path}: {len(merged['formats'])} formats")
# Write str_pcode.py (needs enum.py to exist first for imports)
pcode_path = base_path / "str_pcode.py"
pcode_content = _generate_str_pcode_py(merged["enums"], merged["pseudocode"], arch)
pcode_path.write_text(pcode_content)
print(f"Generated {pcode_path}: {len(merged['pseudocode'])} instructions")
return merged
def _generate_arch_wrapper(arch: str):
"""Wrapper for multiprocessing - returns arch name for ordering."""
generate_arch(arch)
return arch
def generate_all():
"""Generate all architectures in parallel."""
with ProcessPoolExecutor() as executor:
list(executor.map(_generate_arch_wrapper, PDF_URLS.keys()))
def write_pcode(pcode: dict[tuple[str, int], str], enums: dict[str, dict[int, str]], arch: str, path: str):
"""Write str_pcode.py file from extracted pseudocode."""
# Group pseudocode by enum class
by_enum: dict[str, list[tuple[str, int, str]]] = {}
for fmt_name, ops in enums.items():
for opcode, name in ops.items():
if (name, opcode) in pcode: by_enum.setdefault(f"{fmt_name}Op", []).append((name, opcode, pcode[(name, opcode)]))
# Generate file
enum_names = sorted(by_enum.keys())
lines = [f"# autogenerated by pdf.py - do not edit", f"# to regenerate: python -m extra.assembly.amd.pdf",
"# ruff: noqa: E501", f"from extra.assembly.amd.autogen.{arch}.enum import {', '.join(enum_names)}", ""]
for enum_name in enum_names:
lines.append(f"{enum_name}_PCODE = {{")
for name, opcode, code in sorted(by_enum[enum_name], key=lambda x: x[1]):
lines.append(f" {enum_name}.{name}: {code!r},")
lines.append("}\n")
lines.append(f"PSEUDOCODE_STRINGS = {{{', '.join(f'{e}: {e}_PCODE' for e in enum_names)}}}")
with open(path, "w") as f:
f.write("\n".join(lines))
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Generate AMD ISA autogen files from PDF documentation")
parser.add_argument("--arch", choices=list(PDF_URLS.keys()) + ["all"], default="rdna3")
args = parser.parse_args()
if args.arch == "all": generate_all()
else: generate_arch(args.arch)
import pathlib
for arch, url in PDF_URLS.items():
print(f"Processing {arch}...")
pages = extract(url)
tables = extract_tables(pages)
enums = extract_enums(tables)
formats, encodings = extract_ins(tables)
pcode = extract_pcode(pages, enums)
# Fix known PDF errors
if arch == 'rdna3':
fixes = {'SOPP': {8: 'S_WAITCNT_DEPCTR', 58: 'S_TTRACEDATA', 59: 'S_TTRACEDATA_IMM'},
'SOPK': {22: 'S_SUBVECTOR_LOOP_BEGIN', 23: 'S_SUBVECTOR_LOOP_END'},
'SMEM': {34: 'S_ATC_PROBE', 35: 'S_ATC_PROBE_BUFFER'},
'DS': {24: 'DS_GWS_SEMA_RELEASE_ALL', 25: 'DS_GWS_INIT', 26: 'DS_GWS_SEMA_V', 27: 'DS_GWS_SEMA_BR', 28: 'DS_GWS_SEMA_P', 29: 'DS_GWS_BARRIER'},
'FLAT': {40: 'GLOBAL_LOAD_ADDTID_B32', 41: 'GLOBAL_STORE_ADDTID_B32', 55: 'FLAT_ATOMIC_CSUB_U32'}}
for fmt, ops in fixes.items(): enums[fmt] = merge_dicts([enums[fmt], ops])
if arch in ('rdna3', 'rdna4'):
# RDNA SMEM: PDF says DLC=[14], GLC=[16] but hardware uses DLC=[13], GLC=[14]
if 'SMEM' in formats:
formats['SMEM'] = [(n, 13 if n == 'dlc' else 14 if n == 'glc' else h, 13 if n == 'dlc' else 14 if n == 'glc' else l)
for n, h, l in formats['SMEM']]
if arch == 'cdna':
# CDNA DS: PDF is missing the GDS field (bit 16)
if 'DS' in formats and not any(n == 'gds' for n, _, _ in formats['DS']):
formats['DS'].append(('gds', 16, 16))
# CDNA DPP/SDWA: PDF only documents modifier fields (bits[63:32]), need to add VOP overlay fields (bits[31:0])
vop_overlay = [('encoding', 8, 0), ('vop_op', 16, 9), ('vdst', 24, 17), ('vop2_op', 31, 25)]
if 'DPP' in formats and not any(n == 'encoding' for n, _, _ in formats['DPP']):
formats['DPP'] = vop_overlay + [('bc' if n == 'bound_ctrl' else n, h, l) for n, h, l in formats['DPP']]
encodings['DPP'] = '11111010'
if 'SDWA' in formats and not any(n == 'encoding' for n, _, _ in formats['SDWA']):
formats['SDWA'] = vop_overlay + [(n, h, l) for n, h, l in formats['SDWA']]
encodings['SDWA'] = '11111001'
base = pathlib.Path(__file__).parent / "autogen" / arch
write_enums(enums, arch, base / "enum.py")
write_ins(formats, encodings, enums, arch, base / "ins.py")
write_pcode(pcode, enums, arch, base / "str_pcode.py")
print(f" {len(tables)} tables, {len(pcode)} pcode -> {base}")

View File

@@ -1615,7 +1615,7 @@ class TestCarryBorrow(unittest.TestCase):
v_mov_b32_e32(v[2], s[2]),
v_mov_b32_e32(v[3], s[3]),
v_add_co_u32(v[4], VCC, v[0], v[2]),
v_add_co_ci_u32_e32(v[5], VCC, v[1], v[3]),
v_add_co_ci_u32_e32(v[5], v[1], v[3]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vgpr[0][4], 0x00000000, "lo result")

View File

@@ -271,7 +271,7 @@ class TestVOP3P(unittest.TestCase):
s_mov_b32(s[1], 0x44004200), # hi=4.0, lo=3.0
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[1]),
v_pk_add_f16(v[2], v[0], v[1]),
v_pk_add_f16(v[2], v[0], v[1], opsel_hi=3, opsel_hi2=1),
]
st = run_program(instructions, n_lanes=1)
result = st.vgpr[0][2]
@@ -288,7 +288,7 @@ class TestVOP3P(unittest.TestCase):
s_mov_b32(s[1], 0x45004400), # hi=5.0, lo=4.0
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[1]),
v_pk_mul_f16(v[2], v[0], v[1]),
v_pk_mul_f16(v[2], v[0], v[1], opsel_hi=3, opsel_hi2=1),
]
st = run_program(instructions, n_lanes=1)
result = st.vgpr[0][2]
@@ -307,7 +307,7 @@ class TestVOP3P(unittest.TestCase):
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[1]),
v_mov_b32_e32(v[2], s[2]),
v_pk_fma_f16(v[3], v[0], v[1], v[2]),
v_pk_fma_f16(v[3], v[0], v[1], v[2], opsel_hi=3, opsel_hi2=1),
]
st = run_program(instructions, n_lanes=1)
result = st.vgpr[0][3]
@@ -325,7 +325,7 @@ class TestVOP3P(unittest.TestCase):
instructions = [
s_mov_b32(s[0], 0x3c003c00), # packed f16: hi=1.0, lo=1.0
v_mov_b32_e32(v[0], s[0]),
v_pk_add_f16(v[1], v[0], SrcEnum.POS_ONE), # Add inline constant 1.0
v_pk_add_f16(v[1], v[0], SrcEnum.POS_ONE, opsel_hi=3, opsel_hi2=1), # Add inline constant 1.0
]
st = run_program(instructions, n_lanes=1)
result = st.vgpr[0][1]
@@ -345,7 +345,7 @@ class TestVOP3P(unittest.TestCase):
instructions = [
s_mov_b32(s[0], 0x44004200), # packed f16: hi=4.0, lo=3.0
v_mov_b32_e32(v[0], s[0]),
v_pk_mul_f16(v[1], v[0], SrcEnum.POS_TWO),
v_pk_mul_f16(v[1], v[0], SrcEnum.POS_TWO, opsel_hi=3, opsel_hi2=1),
]
st = run_program(instructions, n_lanes=1)
result = st.vgpr[0][1]
@@ -486,12 +486,12 @@ class TestSpecialOps(unittest.TestCase):
"""V_DOT2_F32_BF16 computes dot product of bf16 pairs."""
# bf16 1.0 = 0x3f80, bf16 2.0 = 0x4000
instructions = [
s_mov_b32(s[0], 0x3f803f80), # packed bf16: 1.0, 1.0
s_mov_b32(s[1], 0x40003f80), # packed bf16: 2.0, 1.0
s_mov_b32(s[0], 0x3f803f80), # packed bf16: lo=1.0, hi=1.0
s_mov_b32(s[1], 0x40003f80), # packed bf16: lo=1.0, hi=2.0
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[1]),
v_mov_b32_e32(v[2], 0),
v_dot2_f32_bf16(v[3], v[0], v[1], v[2]),
v_dot2_f32_bf16(v[3], v[0], v[1], v[2], opsel_hi=3, opsel_hi2=1),
]
st = run_program(instructions, n_lanes=1)
# 1.0*1.0 + 1.0*2.0 + 0 = 3.0
@@ -510,7 +510,7 @@ class TestPackedMixedSigns(unittest.TestCase):
s_mov_b32(s[1], 0x3c003c00), # packed: hi=1.0, lo=1.0
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[1]),
v_pk_add_f16(v[2], v[0], v[1]),
v_pk_add_f16(v[2], v[0], v[1], opsel_hi=3, opsel_hi2=1),
]
st = run_program(instructions, n_lanes=1)
result = st.vgpr[0][2]

View File

@@ -0,0 +1,50 @@
#!/usr/bin/env python3
"""Test pdf.py PDF parser and enum generation."""
import unittest, tempfile, importlib.util
from extra.assembly.amd.pdf import extract, extract_tables, extract_enums, write_enums, PDF_URLS
EXPECTED = {
"rdna3": {"pages": 655, "tables": 115, "sop2_ops": 67, "sop2_first": "S_ADD_U32"},
"rdna4": {"pages": 711, "tables": 125, "sop2_ops": 74, "sop2_first": "S_ADD_CO_U32"},
"cdna": {"pages": 610, "tables": 104, "sop2_ops": 52, "sop2_first": "S_ADD_U32"},
}
class TestPDF2(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.data = {name: extract(url) for name, url in PDF_URLS.items()}
cls.tables = {name: extract_tables(pages) for name, pages in cls.data.items()}
cls.enums = {name: extract_enums(cls.tables[name]) for name in PDF_URLS}
def test_page_counts(self):
for name, exp in EXPECTED.items():
self.assertEqual(len(self.data[name]), exp["pages"], f"{name} page count")
def test_table_counts(self):
for name, exp in EXPECTED.items():
self.assertEqual(len(self.tables[name]), exp["tables"], f"{name} table count")
def test_tables_sequential(self):
for name in PDF_URLS:
nums = sorted(self.tables[name].keys())
missing = set(range(1, max(nums) + 1)) - set(nums)
self.assertEqual(missing, set(), f"{name} missing tables: {missing}")
def test_generate_enums(self):
for name, exp in EXPECTED.items():
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
write_enums(self.enums[name], name, f.name)
spec = importlib.util.spec_from_file_location("enum", f.name)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
# Check SOP2Op
self.assertTrue(hasattr(mod, 'SOP2Op'), f"{name} missing SOP2Op")
self.assertEqual(len(mod.SOP2Op), exp["sop2_ops"], f"{name} SOP2Op count")
self.assertEqual(mod.SOP2Op(0).name, exp["sop2_first"], f"{name} SOP2Op first")
# Check all enums have at least 2 ops
for attr in dir(mod):
if attr.endswith('Op'):
self.assertGreaterEqual(len(getattr(mod, attr)), 2, f"{name} {attr} has too few ops")
if __name__ == "__main__":
unittest.main()

View File

@@ -1,150 +0,0 @@
#!/usr/bin/env python3
"""Test that PDF parser correctly extracts format fields."""
import unittest, os
from extra.assembly.amd.autogen.rdna3.ins import SOP1, SOP2, SOPK, SOPP, VOP1, VOP2, VOP3SD, VOPC, FLAT, VOPD, SOP1Op, SOP2Op, VOP1Op, VOP3Op
# expected formats with key fields and whether they have ENCODING
EXPECTED_FORMATS = {
'DPP16': (['SRC0', 'DPP_CTRL', 'BANK_MASK', 'ROW_MASK'], False),
'DPP8': (['SRC0', 'LANE_SEL0', 'LANE_SEL7'], False),
'DS': (['OP', 'ADDR', 'DATA0', 'DATA1', 'VDST'], True),
'EXP': (['EN', 'TARGET', 'VSRC0', 'VSRC1', 'VSRC2', 'VSRC3'], True),
'FLAT': (['OP', 'ADDR', 'DATA', 'SADDR', 'VDST', 'OFFSET'], True),
'LDSDIR': (['VDST', 'OP'], True),
'MIMG': (['OP', 'VADDR', 'VDATA', 'SRSRC', 'DMASK'], True),
'MTBUF': (['OP', 'VADDR', 'VDATA', 'SRSRC', 'FORMAT', 'SOFFSET'], True),
'MUBUF': (['OP', 'VADDR', 'VDATA', 'SRSRC', 'SOFFSET'], True),
'SMEM': (['OP', 'SBASE', 'SDATA', 'OFFSET', 'SOFFSET'], True),
'SOP1': (['OP', 'SDST', 'SSRC0'], True),
'SOP2': (['OP', 'SDST', 'SSRC0', 'SSRC1'], True),
'SOPC': (['OP', 'SSRC0', 'SSRC1'], True),
'SOPK': (['OP', 'SDST', 'SIMM16'], True),
'SOPP': (['OP', 'SIMM16'], True),
'VINTERP': (['OP', 'VDST', 'SRC0', 'SRC1', 'SRC2'], True),
'VOP1': (['OP', 'VDST', 'SRC0'], True),
'VOP2': (['OP', 'VDST', 'SRC0', 'VSRC1'], True),
'VOP3': (['OP', 'VDST', 'SRC0', 'SRC1', 'SRC2'], True),
'VOP3P': (['OP', 'VDST', 'SRC0', 'SRC1', 'SRC2'], True),
'VOP3SD': (['OP', 'VDST', 'SDST', 'SRC0', 'SRC1', 'SRC2'], True),
'VOPC': (['OP', 'SRC0', 'VSRC1'], True),
'VOPD': (['OPX', 'OPY', 'SRCX0', 'SRCY0', 'VDSTX', 'VDSTY'], True),
}
# Skip PDF parsing tests by default - only run with TEST_PDF_PARSER=1
# These are slow (~5s) and only needed when regenerating autogen/
@unittest.skipUnless(os.environ.get("TEST_PDF_PARSER"), "set TEST_PDF_PARSER=1 to run PDF parser tests")
class TestPDFParserGenerate(unittest.TestCase):
"""Test the PDF parser by running generate() and checking results."""
def test_pdf_parser(self):
"""Single test that validates all PDF parser outputs."""
from extra.assembly.amd.dsl import generate
result = generate()
# test_all_formats_present
for fmt_name in EXPECTED_FORMATS:
self.assertIn(fmt_name, result["formats"], f"missing format {fmt_name}")
# test_format_count
self.assertEqual(len(result["formats"]), 23)
# test_no_duplicate_fields
for fmt_name, fields in result["formats"].items():
field_names = [f[0] for f in fields]
self.assertEqual(len(field_names), len(set(field_names)), f"{fmt_name} has duplicate fields: {field_names}")
# test_expected_fields
for fmt_name, (expected_fields, has_encoding) in EXPECTED_FORMATS.items():
fields = {f[0] for f in result["formats"].get(fmt_name, [])}
for field in expected_fields:
self.assertIn(field, fields, f"{fmt_name} missing {field}")
if has_encoding:
self.assertIn("ENCODING", fields, f"{fmt_name} should have ENCODING")
else:
self.assertNotIn("ENCODING", fields, f"{fmt_name} should not have ENCODING")
# test_vopd_no_dpp16_fields
vopd_fields = {f[0] for f in result["formats"].get("VOPD", [])}
for field in ['DPP_CTRL', 'BANK_MASK', 'ROW_MASK']:
self.assertNotIn(field, vopd_fields, f"VOPD should not have {field}")
# test_dpp16_no_vinterp_fields
dpp16_fields = {f[0] for f in result["formats"].get("DPP16", [])}
for field in ['VDST', 'WAITEXP']:
self.assertNotIn(field, dpp16_fields, f"DPP16 should not have {field}")
# test_sopp_no_smem_fields
sopp_fields = {f[0] for f in result["formats"].get("SOPP", [])}
for field in ['SBASE', 'SDATA']:
self.assertNotIn(field, sopp_fields, f"SOPP should not have {field}")
class TestPDFParser(unittest.TestCase):
"""Verify format classes have correct fields from PDF parsing."""
def test_sop2_fields(self):
"""SOP2 should have op, sdst, ssrc0, ssrc1."""
for field in ['op', 'sdst', 'ssrc0', 'ssrc1']:
self.assertIn(field, SOP2._fields)
self.assertEqual(SOP2._fields['op'].hi, 29)
self.assertEqual(SOP2._fields['op'].lo, 23)
def test_sop1_fields(self):
"""SOP1 should have op, sdst, ssrc0 with correct bit positions."""
for field in ['op', 'sdst', 'ssrc0']:
self.assertIn(field, SOP1._fields)
self.assertNotIn('simm16', SOP1._fields)
self.assertEqual(SOP1._fields['ssrc0'].hi, 7)
self.assertEqual(SOP1._fields['ssrc0'].lo, 0)
assert SOP1._encoding is not None
self.assertEqual(SOP1._encoding[0].hi, 31)
self.assertEqual(SOP1._encoding[1], 0b101111101)
def test_vop3sd_fields(self):
"""VOP3SD should have all fields including src0/src1/src2 from page continuation."""
for field in ['op', 'vdst', 'sdst', 'src0', 'src1', 'src2']:
self.assertIn(field, VOP3SD._fields)
self.assertEqual(VOP3SD._fields['src0'].hi, 40)
self.assertEqual(VOP3SD._fields['src0'].lo, 32)
self.assertEqual(VOP3SD._size(), 8)
def test_flat_has_vdst(self):
"""FLAT should have vdst field."""
self.assertIn('vdst', FLAT._fields)
self.assertEqual(FLAT._fields['vdst'].hi, 63)
self.assertEqual(FLAT._fields['vdst'].lo, 56)
def test_encoding_bits(self):
"""Verify encoding bits are correct for major formats."""
tests = [
(SOP2, 31, 30, 0b10),
(SOPK, 31, 28, 0b1011),
(SOPP, 31, 23, 0b101111111),
(VOP1, 31, 25, 0b0111111),
(VOP2, 31, 31, 0b0),
(VOPC, 31, 25, 0b0111110),
(FLAT, 31, 26, 0b110111),
]
for cls, hi, lo, val in tests:
assert cls._encoding is not None
self.assertEqual(cls._encoding[0].hi, hi, f"{cls.__name__} encoding hi")
self.assertEqual(cls._encoding[0].lo, lo, f"{cls.__name__} encoding lo")
self.assertEqual(cls._encoding[1], val, f"{cls.__name__} encoding val")
def test_opcode_enums_exist(self):
"""Verify opcode enums are generated with expected counts."""
self.assertGreater(len(SOP1Op), 50)
self.assertGreater(len(SOP2Op), 50)
self.assertGreater(len(VOP1Op), 50)
self.assertGreater(len(VOP3Op), 200)
def test_vopd_no_duplicate_fields(self):
"""VOPD should not have duplicate fields and should not include DPP16 fields."""
field_names = list(VOPD._fields.keys())
self.assertEqual(len(field_names), len(set(field_names)))
for field in ['srcx0', 'srcy0', 'opx', 'opy']:
self.assertIn(field, VOPD._fields)
for field in ['dpp_ctrl', 'bank_mask', 'row_mask']:
self.assertNotIn(field, VOPD._fields)
if __name__ == "__main__":
unittest.main()