mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
assembly/amd: make pdf.py code shine (#14029)
* assembly/amd: make pdf.py code shine * no merge * pdf2 is the future * something * regen enums * test * work * remove junk * write * pcode extraction * pdf2 passes all tests * simplify * simpler pdf * late filter * remove hacks * simplify pdf2.py * field type * remove defaults * don't export srcenum * simple pdf.py * simpler * cleaner * less hack in PDF
This commit is contained in:
@@ -1,46 +1,6 @@
|
||||
# autogenerated from AMD CDNA3+CDNA4 ISA PDF by pdf.py - do not edit
|
||||
# autogenerated from AMD ISA PDF by pdf.py - do not edit
|
||||
from enum import IntEnum
|
||||
|
||||
class SrcEnum(IntEnum):
|
||||
S_ADD_U32 = 0
|
||||
S_SUB_U32 = 1
|
||||
S_ADD_I32 = 2
|
||||
S_SUB_I32 = 3
|
||||
S_ADDC_U32 = 4
|
||||
S_SUBB_U32 = 5
|
||||
S_MIN_I32 = 6
|
||||
FLAT_SCRATCH_LO = 102
|
||||
FLAT_SCRATCH_HI = 103
|
||||
XNACK_MASK_LO = 104
|
||||
XNACK_MASK_HI = 105
|
||||
VCC_LO = 106
|
||||
VCC_HI = 107
|
||||
M0 = 124
|
||||
EXEC_LO = 126
|
||||
EXEC_HI = 127
|
||||
ZERO = 128
|
||||
DPP8 = 233
|
||||
DPP8FI = 234
|
||||
SHARED_BASE = 235
|
||||
SHARED_LIMIT = 236
|
||||
PRIVATE_BASE = 237
|
||||
PRIVATE_LIMIT = 238
|
||||
RESERVED = 239
|
||||
POS_HALF = 240
|
||||
NEG_HALF = 241
|
||||
POS_ONE = 242
|
||||
NEG_ONE = 243
|
||||
POS_TWO = 244
|
||||
NEG_TWO = 245
|
||||
POS_FOUR = 246
|
||||
NEG_FOUR = 247
|
||||
INV_2PI = 248
|
||||
DPP16 = 250
|
||||
VCCZ = 251
|
||||
EXECZ = 252
|
||||
SCC = 253
|
||||
LDS_DIRECT = 254
|
||||
|
||||
class DSOp(IntEnum):
|
||||
DS_ADD_U32 = 0
|
||||
DS_SUB_U32 = 1
|
||||
@@ -155,12 +115,6 @@ class DSOp(IntEnum):
|
||||
DS_READ2ST64_B64 = 120
|
||||
DS_ADD_RTN_F64 = 124
|
||||
DS_CONDXCHG32_RTN_B64 = 126
|
||||
DS_GWS_SEMA_RELEASE_ALL = 152
|
||||
DS_GWS_INIT = 153
|
||||
DS_GWS_SEMA_V = 154
|
||||
DS_GWS_SEMA_BR = 155
|
||||
DS_GWS_SEMA_P = 156
|
||||
DS_GWS_BARRIER = 157
|
||||
DS_READ_ADDTID_B32 = 182
|
||||
DS_PK_ADD_RTN_F16 = 183
|
||||
DS_PK_ADD_RTN_BF16 = 184
|
||||
@@ -174,7 +128,6 @@ class DSOp(IntEnum):
|
||||
DS_READ_B64_TR_B16 = 227
|
||||
DS_READ_B96 = 254
|
||||
DS_READ_B128 = 255
|
||||
CDNA4 = 600
|
||||
|
||||
class FLATOp(IntEnum):
|
||||
FLAT_LOAD_UBYTE = 16
|
||||
@@ -231,7 +184,6 @@ class FLATOp(IntEnum):
|
||||
FLAT_ATOMIC_XOR_X2 = 106
|
||||
FLAT_ATOMIC_INC_X2 = 107
|
||||
FLAT_ATOMIC_DEC_X2 = 108
|
||||
CDNA4 = 600
|
||||
|
||||
class GLOBALOp(IntEnum):
|
||||
GLOBAL_LOAD_UBYTE = 16
|
||||
@@ -295,7 +247,6 @@ class GLOBALOp(IntEnum):
|
||||
GLOBAL_ATOMIC_DEC_X2 = 108
|
||||
GLOBAL_LOAD_LDS_DWORDX4 = 125
|
||||
GLOBAL_LOAD_LDS_DWORDX3 = 126
|
||||
CDNA4 = 600
|
||||
|
||||
class MTBUFOp(IntEnum):
|
||||
TBUFFER_LOAD_FORMAT_X = 0
|
||||
@@ -390,7 +341,6 @@ class MUBUFOp(IntEnum):
|
||||
BUFFER_ATOMIC_XOR_X2 = 106
|
||||
BUFFER_ATOMIC_INC_X2 = 107
|
||||
BUFFER_ATOMIC_DEC_X2 = 108
|
||||
CDNA4 = 600
|
||||
|
||||
class SCRATCHOp(IntEnum):
|
||||
SCRATCH_LOAD_UBYTE = 16
|
||||
@@ -504,7 +454,6 @@ class SMEMOp(IntEnum):
|
||||
S_ATOMIC_XOR_X2 = 170
|
||||
S_ATOMIC_INC_X2 = 171
|
||||
S_ATOMIC_DEC_X2 = 172
|
||||
CDNA4 = 600
|
||||
|
||||
class SOP1Op(IntEnum):
|
||||
S_MOV_B32 = 0
|
||||
@@ -561,7 +510,6 @@ class SOP1Op(IntEnum):
|
||||
S_ANDN1_WREXEC_B64 = 53
|
||||
S_ANDN2_WREXEC_B64 = 54
|
||||
S_BITREPLICATE_B64_B32 = 55
|
||||
CDNA4 = 600
|
||||
|
||||
class SOP2Op(IntEnum):
|
||||
S_ADD_U32 = 0
|
||||
@@ -616,7 +564,6 @@ class SOP2Op(IntEnum):
|
||||
S_PACK_LL_B32_B16 = 50
|
||||
S_PACK_LH_B32_B16 = 51
|
||||
S_PACK_HH_B32_B16 = 52
|
||||
CDNA4 = 600
|
||||
|
||||
class SOPCOp(IntEnum):
|
||||
S_CMP_EQ_I32 = 0
|
||||
@@ -639,7 +586,6 @@ class SOPCOp(IntEnum):
|
||||
S_SET_GPR_IDX_ON = 17
|
||||
S_CMP_EQ_U64 = 18
|
||||
S_CMP_LG_U64 = 19
|
||||
CDNA4 = 600
|
||||
|
||||
class SOPKOp(IntEnum):
|
||||
S_MOVK_I32 = 0
|
||||
@@ -695,7 +641,6 @@ class SOPPOp(IntEnum):
|
||||
S_ENDPGM_SAVED = 27
|
||||
S_SET_GPR_IDX_OFF = 28
|
||||
S_SET_GPR_IDX_MODE = 29
|
||||
CDNA4 = 600
|
||||
|
||||
class VOP1Op(IntEnum):
|
||||
V_NOP = 0
|
||||
@@ -783,7 +728,6 @@ class VOP1Op(IntEnum):
|
||||
V_PERMLANE16_SWAP_B32 = 89
|
||||
V_PERMLANE32_SWAP_B32 = 90
|
||||
V_CVT_F32_BF16 = 91
|
||||
CDNA4 = 600
|
||||
|
||||
class VOP2Op(IntEnum):
|
||||
V_CNDMASK_B32 = 0
|
||||
@@ -848,7 +792,6 @@ class VOP2Op(IntEnum):
|
||||
V_FMAC_F32 = 59
|
||||
V_PK_FMAC_F16 = 60
|
||||
V_XNOR_B32 = 61
|
||||
CDNA4 = 600
|
||||
|
||||
class VOP3AOp(IntEnum):
|
||||
V_CMP_CLASS_F32 = 16
|
||||
@@ -1268,7 +1211,7 @@ class VOP3AOp(IntEnum):
|
||||
V_CVT_SCALEF32_SR_PK32_BF6_F32 = 597
|
||||
V_CVT_SCALEF32_PK32_F32_FP6 = 598
|
||||
V_CVT_SCALEF32_PK32_F32_BF6 = 599
|
||||
CDNA4 = 600
|
||||
V_CVT_SCALEF32_PK32_FP6_F16 = 600
|
||||
V_CVT_SCALEF32_PK32_FP6_BF16 = 601
|
||||
V_CVT_SCALEF32_PK32_BF6_F16 = 602
|
||||
V_CVT_SCALEF32_PK32_BF6_BF16 = 603
|
||||
@@ -1338,7 +1281,6 @@ class VOP3BOp(IntEnum):
|
||||
V_DIV_SCALE_F64 = 481
|
||||
V_MAD_U64_U32 = 488
|
||||
V_MAD_I64_I32 = 489
|
||||
CDNA4 = 600
|
||||
|
||||
class VOP3POp(IntEnum):
|
||||
V_PK_MAD_I16 = 0
|
||||
@@ -1388,8 +1330,6 @@ class VOP3POp(IntEnum):
|
||||
V_SMFMAC_F32_16X16X128_BF8_BF8 = 59
|
||||
V_SMFMAC_F32_16X16X128_BF8_FP8 = 60
|
||||
V_SMFMAC_F32_16X16X128_FP8_BF8 = 61
|
||||
V_MFMA_F32_16X16X8_XF32 = 62
|
||||
V_MFMA_F32_32X32X4_XF32 = 63
|
||||
V_MFMA_F32_32X32X1_2B_F32 = 64
|
||||
V_MFMA_F32_16X16X1_4B_F32 = 65
|
||||
V_MFMA_F32_4X4X1_16B_F32 = 66
|
||||
@@ -1447,7 +1387,6 @@ class VOP3POp(IntEnum):
|
||||
V_SMFMAC_F32_32X32X32_BF8_FP8 = 125
|
||||
V_SMFMAC_F32_32X32X32_FP8_BF8 = 126
|
||||
V_SMFMAC_F32_32X32X32_FP8_FP8 = 127
|
||||
CDNA4 = 600
|
||||
|
||||
class VOPCOp(IntEnum):
|
||||
V_CMP_CLASS_F32 = 16
|
||||
@@ -1648,4 +1587,3 @@ class VOPCOp(IntEnum):
|
||||
V_CMPX_NE_U64 = 253
|
||||
V_CMPX_GE_U64 = 254
|
||||
V_CMPX_T_U64 = 255
|
||||
CDNA4 = 600
|
||||
|
||||
@@ -1,19 +1,18 @@
|
||||
# autogenerated from AMD CDNA3+CDNA4 ISA PDF by pdf.py - do not edit
|
||||
# autogenerated from AMD ISA PDF by pdf.py - do not edit
|
||||
# ruff: noqa: F401,F403
|
||||
from typing import Annotated
|
||||
from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, Inst96, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
|
||||
from extra.assembly.amd.dsl import *
|
||||
from extra.assembly.amd.autogen.cdna.enum import *
|
||||
import functools
|
||||
|
||||
# instruction formats
|
||||
class DPP(Inst64):
|
||||
class DPP(Inst):
|
||||
encoding = bits[8:0] == 0b11111010
|
||||
vop_op = bits[16:9]
|
||||
vdst:VGPRField = bits[24:17]
|
||||
vop2_op = bits[31:25]
|
||||
src0:Src = bits[39:32]
|
||||
vop_op = bits[16:9]
|
||||
vop2_op = bits[31:25]
|
||||
dpp_ctrl = bits[48:40]
|
||||
bound_ctrl = bits[51]
|
||||
bc = bits[51]
|
||||
src0_neg = bits[52]
|
||||
src0_abs = bits[53]
|
||||
src1_neg = bits[54]
|
||||
@@ -21,7 +20,7 @@ class DPP(Inst64):
|
||||
bank_mask = bits[59:56]
|
||||
row_mask = bits[63:60]
|
||||
|
||||
class DS(Inst64):
|
||||
class DS(Inst):
|
||||
encoding = bits[31:26] == 0b110110
|
||||
op:Annotated[BitField, DSOp] = bits[24:17]
|
||||
vdst:VGPRField = bits[63:56]
|
||||
@@ -33,7 +32,7 @@ class DS(Inst64):
|
||||
gds = bits[16]
|
||||
acc = bits[25]
|
||||
|
||||
class FLAT(Inst64):
|
||||
class FLAT(Inst):
|
||||
encoding = bits[31:26] == 0b110111
|
||||
op:Annotated[BitField, FLATOp] = bits[24:18]
|
||||
vdst:VGPRField = bits[63:56]
|
||||
@@ -48,7 +47,7 @@ class FLAT(Inst64):
|
||||
sc1 = bits[25]
|
||||
acc = bits[55]
|
||||
|
||||
class MTBUF(Inst64):
|
||||
class MTBUF(Inst):
|
||||
encoding = bits[31:26] == 0b111010
|
||||
op:Annotated[BitField, MTBUFOp] = bits[18:15]
|
||||
vdata:VGPRField = bits[47:40]
|
||||
@@ -56,15 +55,16 @@ class MTBUF(Inst64):
|
||||
srsrc:SGPRField = bits[52:48]
|
||||
soffset:SSrc = bits[63:56]
|
||||
offset:Imm = bits[11:0]
|
||||
format = bits[25:19]
|
||||
offen = bits[12]
|
||||
idxen = bits[13]
|
||||
sc0 = bits[14]
|
||||
dfmt = bits[22:19]
|
||||
nfmt = bits[25:23]
|
||||
sc1 = bits[53]
|
||||
nt = bits[54]
|
||||
acc = bits[55]
|
||||
sc0 = bits[14]
|
||||
|
||||
class MUBUF(Inst64):
|
||||
class MUBUF(Inst):
|
||||
encoding = bits[31:26] == 0b111000
|
||||
op:Annotated[BitField, MUBUFOp] = bits[24:18]
|
||||
vdata:VGPRField = bits[47:40]
|
||||
@@ -80,16 +80,16 @@ class MUBUF(Inst64):
|
||||
nt = bits[17]
|
||||
acc = bits[55]
|
||||
|
||||
class SDWA(Inst64):
|
||||
class SDWA(Inst):
|
||||
encoding = bits[8:0] == 0b11111001
|
||||
vop_op = bits[16:9]
|
||||
vdst:VGPRField = bits[24:17]
|
||||
src0:Src = bits[39:32]
|
||||
omod = bits[47:46]
|
||||
clmp = bits[45]
|
||||
vop_op = bits[16:9]
|
||||
vop2_op = bits[31:25]
|
||||
src0:Src = bits[39:32]
|
||||
dst_sel = bits[42:40]
|
||||
dst_u = bits[44:43]
|
||||
clmp = bits[45]
|
||||
omod = bits[47:46]
|
||||
src0_sel = bits[50:48]
|
||||
src0_sext = bits[51]
|
||||
src0_neg = bits[52]
|
||||
@@ -101,12 +101,10 @@ class SDWA(Inst64):
|
||||
src1_abs = bits[61]
|
||||
s1 = bits[63]
|
||||
|
||||
class SDWAB(Inst64):
|
||||
class SDWAB(Inst):
|
||||
sdst:SGPRField = bits[46:40]
|
||||
src0:Src = bits[39:32]
|
||||
dst_sel = bits[42:40]
|
||||
dst_u = bits[44:43]
|
||||
clmp = bits[45]
|
||||
omod = bits[47:46]
|
||||
sd = bits[47]
|
||||
src0_sel = bits[50:48]
|
||||
src0_sext = bits[51]
|
||||
src0_neg = bits[52]
|
||||
@@ -118,7 +116,7 @@ class SDWAB(Inst64):
|
||||
src1_abs = bits[61]
|
||||
s1 = bits[63]
|
||||
|
||||
class SMEM(Inst64):
|
||||
class SMEM(Inst):
|
||||
encoding = bits[31:26] == 0b110000
|
||||
op:Annotated[BitField, SMEMOp] = bits[25:18]
|
||||
sdata:SGPRField = bits[12:6]
|
||||
@@ -128,79 +126,78 @@ class SMEM(Inst64):
|
||||
glc = bits[16]
|
||||
soe = bits[14]
|
||||
nv = bits[15]
|
||||
imm = bits[17]
|
||||
imm:Imm = bits[17]
|
||||
|
||||
class SOP1(Inst32):
|
||||
class SOP1(Inst):
|
||||
encoding = bits[31:23] == 0b101111101
|
||||
op:Annotated[BitField, SOP1Op] = bits[15:8]
|
||||
sdst:SGPRField = bits[22:16]
|
||||
ssrc0:SSrc = bits[7:0]
|
||||
|
||||
class SOP2(Inst32):
|
||||
class SOP2(Inst):
|
||||
encoding = bits[31:30] == 0b10
|
||||
op:Annotated[BitField, SOP2Op] = bits[29:23]
|
||||
sdst:SGPRField = bits[22:16]
|
||||
ssrc0:SSrc = bits[7:0]
|
||||
ssrc1:SSrc = bits[15:8]
|
||||
|
||||
class SOPC(Inst32):
|
||||
class SOPC(Inst):
|
||||
encoding = bits[31:23] == 0b101111110
|
||||
op:Annotated[BitField, SOPCOp] = bits[22:16]
|
||||
ssrc0:SSrc = bits[7:0]
|
||||
ssrc1:SSrc = bits[15:8]
|
||||
|
||||
class SOPK(Inst32):
|
||||
class SOPK(Inst):
|
||||
encoding = bits[31:28] == 0b1011
|
||||
op:Annotated[BitField, SOPKOp] = bits[27:23]
|
||||
sdst:SGPRField = bits[22:16]
|
||||
simm16:SImm = bits[15:0]
|
||||
|
||||
class SOPP(Inst32):
|
||||
class SOPP(Inst):
|
||||
encoding = bits[31:23] == 0b101111111
|
||||
op:Annotated[BitField, SOPPOp] = bits[22:16]
|
||||
simm16:SImm = bits[15:0]
|
||||
|
||||
class VOP1(Inst32):
|
||||
encoding = bits[31:25] == 0b111111
|
||||
class VOP1(Inst):
|
||||
encoding = bits[31:25] == 0b0111111
|
||||
op:Annotated[BitField, VOP1Op] = bits[16:9]
|
||||
vdst:VGPRField = bits[24:17]
|
||||
src0:Src = bits[8:0]
|
||||
|
||||
class VOP2(Inst32):
|
||||
encoding = bits[31] == 0
|
||||
class VOP2(Inst):
|
||||
encoding = bits[31] == 0b0
|
||||
op:Annotated[BitField, VOP2Op] = bits[30:25]
|
||||
vdst:VGPRField = bits[24:17]
|
||||
src0:Src = bits[8:0]
|
||||
vsrc1:VGPRField = bits[16:9]
|
||||
|
||||
class VOP3A(Inst64):
|
||||
class VOP3A(Inst):
|
||||
encoding = bits[31:26] == 0b110100
|
||||
vdst:VGPRField = bits[7:0]
|
||||
abs = bits[10:8]
|
||||
opsel = bits[14:11]
|
||||
clmp = bits[15]
|
||||
op:Annotated[BitField, VOP3AOp] = bits[25:16]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
src0:Src = bits[40:32]
|
||||
src1:Src = bits[49:41]
|
||||
src2:Src = bits[58:50]
|
||||
omod = bits[60:59]
|
||||
neg = bits[63:61]
|
||||
abs = bits[10:8]
|
||||
clmp = bits[15]
|
||||
opsel = bits[14:11]
|
||||
|
||||
class VOP3B(Inst64):
|
||||
class VOP3B(Inst):
|
||||
encoding = bits[31:26] == 0b110100
|
||||
op:Annotated[BitField, VOP3BOp] = bits[25:16]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
sdst:SGPRField = bits[14:8]
|
||||
clmp = bits[15]
|
||||
op:Annotated[BitField, VOP3BOp] = bits[25:16]
|
||||
src0:Src = bits[40:32]
|
||||
src1:Src = bits[49:41]
|
||||
src2:Src = bits[58:50]
|
||||
omod = bits[60:59]
|
||||
neg = bits[63:61]
|
||||
clmp = bits[15]
|
||||
|
||||
class VOP3P(Inst64):
|
||||
class VOP3P(Inst):
|
||||
encoding = bits[31:23] == 0b110100111
|
||||
_defaults = {'opsel_hi': 3, 'opsel_hi2': 1}
|
||||
op:Annotated[BitField, VOP3POp] = bits[22:16]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
src0:Src = bits[40:32]
|
||||
@@ -208,13 +205,13 @@ class VOP3P(Inst64):
|
||||
src2:Src = bits[58:50]
|
||||
neg = bits[63:61]
|
||||
neg_hi = bits[10:8]
|
||||
clmp = bits[15]
|
||||
opsel = bits[13:11]
|
||||
opsel_hi = bits[60:59]
|
||||
clmp = bits[15]
|
||||
opsel_hi2 = bits[14]
|
||||
|
||||
class VOPC(Inst32):
|
||||
encoding = bits[31:25] == 0b111110
|
||||
class VOPC(Inst):
|
||||
encoding = bits[31:25] == 0b0111110
|
||||
op:Annotated[BitField, VOPCOp] = bits[24:17]
|
||||
src0:Src = bits[8:0]
|
||||
vsrc1:VGPRField = bits[16:9]
|
||||
@@ -333,12 +330,6 @@ ds_read2_b64 = functools.partial(DS, DSOp.DS_READ2_B64)
|
||||
ds_read2st64_b64 = functools.partial(DS, DSOp.DS_READ2ST64_B64)
|
||||
ds_add_rtn_f64 = functools.partial(DS, DSOp.DS_ADD_RTN_F64)
|
||||
ds_condxchg32_rtn_b64 = functools.partial(DS, DSOp.DS_CONDXCHG32_RTN_B64)
|
||||
ds_gws_sema_release_all = functools.partial(DS, DSOp.DS_GWS_SEMA_RELEASE_ALL)
|
||||
ds_gws_init = functools.partial(DS, DSOp.DS_GWS_INIT)
|
||||
ds_gws_sema_v = functools.partial(DS, DSOp.DS_GWS_SEMA_V)
|
||||
ds_gws_sema_br = functools.partial(DS, DSOp.DS_GWS_SEMA_BR)
|
||||
ds_gws_sema_p = functools.partial(DS, DSOp.DS_GWS_SEMA_P)
|
||||
ds_gws_barrier = functools.partial(DS, DSOp.DS_GWS_BARRIER)
|
||||
ds_read_addtid_b32 = functools.partial(DS, DSOp.DS_READ_ADDTID_B32)
|
||||
ds_pk_add_rtn_f16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_F16)
|
||||
ds_pk_add_rtn_bf16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_BF16)
|
||||
@@ -352,7 +343,6 @@ ds_read_b64_tr_b8 = functools.partial(DS, DSOp.DS_READ_B64_TR_B8)
|
||||
ds_read_b64_tr_b16 = functools.partial(DS, DSOp.DS_READ_B64_TR_B16)
|
||||
ds_read_b96 = functools.partial(DS, DSOp.DS_READ_B96)
|
||||
ds_read_b128 = functools.partial(DS, DSOp.DS_READ_B128)
|
||||
cdna4 = functools.partial(DS, DSOp.CDNA4)
|
||||
flat_load_ubyte = functools.partial(FLAT, FLATOp.FLAT_LOAD_UBYTE)
|
||||
flat_load_sbyte = functools.partial(FLAT, FLATOp.FLAT_LOAD_SBYTE)
|
||||
flat_load_ushort = functools.partial(FLAT, FLATOp.FLAT_LOAD_USHORT)
|
||||
@@ -407,7 +397,6 @@ flat_atomic_or_x2 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_OR_X2)
|
||||
flat_atomic_xor_x2 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_XOR_X2)
|
||||
flat_atomic_inc_x2 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_INC_X2)
|
||||
flat_atomic_dec_x2 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_DEC_X2)
|
||||
cdna4 = functools.partial(FLAT, FLATOp.CDNA4)
|
||||
global_load_ubyte = functools.partial(FLAT, GLOBALOp.GLOBAL_LOAD_UBYTE, seg=2)
|
||||
global_load_sbyte = functools.partial(FLAT, GLOBALOp.GLOBAL_LOAD_SBYTE, seg=2)
|
||||
global_load_ushort = functools.partial(FLAT, GLOBALOp.GLOBAL_LOAD_USHORT, seg=2)
|
||||
@@ -469,7 +458,6 @@ global_atomic_inc_x2 = functools.partial(FLAT, GLOBALOp.GLOBAL_ATOMIC_INC_X2, se
|
||||
global_atomic_dec_x2 = functools.partial(FLAT, GLOBALOp.GLOBAL_ATOMIC_DEC_X2, seg=2)
|
||||
global_load_lds_dwordx4 = functools.partial(FLAT, GLOBALOp.GLOBAL_LOAD_LDS_DWORDX4, seg=2)
|
||||
global_load_lds_dwordx3 = functools.partial(FLAT, GLOBALOp.GLOBAL_LOAD_LDS_DWORDX3, seg=2)
|
||||
cdna4 = functools.partial(FLAT, GLOBALOp.CDNA4, seg=2)
|
||||
tbuffer_load_format_x = functools.partial(MTBUF, MTBUFOp.TBUFFER_LOAD_FORMAT_X)
|
||||
tbuffer_load_format_xy = functools.partial(MTBUF, MTBUFOp.TBUFFER_LOAD_FORMAT_XY)
|
||||
tbuffer_load_format_xyz = functools.partial(MTBUF, MTBUFOp.TBUFFER_LOAD_FORMAT_XYZ)
|
||||
@@ -560,7 +548,6 @@ buffer_atomic_or_x2 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_OR_X2)
|
||||
buffer_atomic_xor_x2 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_XOR_X2)
|
||||
buffer_atomic_inc_x2 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_INC_X2)
|
||||
buffer_atomic_dec_x2 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_DEC_X2)
|
||||
cdna4 = functools.partial(MUBUF, MUBUFOp.CDNA4)
|
||||
scratch_load_ubyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_UBYTE, seg=1)
|
||||
scratch_load_sbyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SBYTE, seg=1)
|
||||
scratch_load_ushort = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_USHORT, seg=1)
|
||||
@@ -670,7 +657,6 @@ s_atomic_or_x2 = functools.partial(SMEM, SMEMOp.S_ATOMIC_OR_X2)
|
||||
s_atomic_xor_x2 = functools.partial(SMEM, SMEMOp.S_ATOMIC_XOR_X2)
|
||||
s_atomic_inc_x2 = functools.partial(SMEM, SMEMOp.S_ATOMIC_INC_X2)
|
||||
s_atomic_dec_x2 = functools.partial(SMEM, SMEMOp.S_ATOMIC_DEC_X2)
|
||||
cdna4 = functools.partial(SMEM, SMEMOp.CDNA4)
|
||||
s_mov_b32 = functools.partial(SOP1, SOP1Op.S_MOV_B32)
|
||||
s_mov_b64 = functools.partial(SOP1, SOP1Op.S_MOV_B64)
|
||||
s_cmov_b32 = functools.partial(SOP1, SOP1Op.S_CMOV_B32)
|
||||
@@ -725,7 +711,6 @@ s_orn1_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_ORN1_SAVEEXEC_B64)
|
||||
s_andn1_wrexec_b64 = functools.partial(SOP1, SOP1Op.S_ANDN1_WREXEC_B64)
|
||||
s_andn2_wrexec_b64 = functools.partial(SOP1, SOP1Op.S_ANDN2_WREXEC_B64)
|
||||
s_bitreplicate_b64_b32 = functools.partial(SOP1, SOP1Op.S_BITREPLICATE_B64_B32)
|
||||
cdna4 = functools.partial(SOP1, SOP1Op.CDNA4)
|
||||
s_add_u32 = functools.partial(SOP2, SOP2Op.S_ADD_U32)
|
||||
s_sub_u32 = functools.partial(SOP2, SOP2Op.S_SUB_U32)
|
||||
s_add_i32 = functools.partial(SOP2, SOP2Op.S_ADD_I32)
|
||||
@@ -778,7 +763,6 @@ s_lshl4_add_u32 = functools.partial(SOP2, SOP2Op.S_LSHL4_ADD_U32)
|
||||
s_pack_ll_b32_b16 = functools.partial(SOP2, SOP2Op.S_PACK_LL_B32_B16)
|
||||
s_pack_lh_b32_b16 = functools.partial(SOP2, SOP2Op.S_PACK_LH_B32_B16)
|
||||
s_pack_hh_b32_b16 = functools.partial(SOP2, SOP2Op.S_PACK_HH_B32_B16)
|
||||
cdna4 = functools.partial(SOP2, SOP2Op.CDNA4)
|
||||
s_cmp_eq_i32 = functools.partial(SOPC, SOPCOp.S_CMP_EQ_I32)
|
||||
s_cmp_lg_i32 = functools.partial(SOPC, SOPCOp.S_CMP_LG_I32)
|
||||
s_cmp_gt_i32 = functools.partial(SOPC, SOPCOp.S_CMP_GT_I32)
|
||||
@@ -799,7 +783,6 @@ s_setvskip = functools.partial(SOPC, SOPCOp.S_SETVSKIP)
|
||||
s_set_gpr_idx_on = functools.partial(SOPC, SOPCOp.S_SET_GPR_IDX_ON)
|
||||
s_cmp_eq_u64 = functools.partial(SOPC, SOPCOp.S_CMP_EQ_U64)
|
||||
s_cmp_lg_u64 = functools.partial(SOPC, SOPCOp.S_CMP_LG_U64)
|
||||
cdna4 = functools.partial(SOPC, SOPCOp.CDNA4)
|
||||
s_movk_i32 = functools.partial(SOPK, SOPKOp.S_MOVK_I32)
|
||||
s_cmovk_i32 = functools.partial(SOPK, SOPKOp.S_CMOVK_I32)
|
||||
s_cmpk_eq_i32 = functools.partial(SOPK, SOPKOp.S_CMPK_EQ_I32)
|
||||
@@ -851,7 +834,6 @@ s_cbranch_cdbgsys_and_user = functools.partial(SOPP, SOPPOp.S_CBRANCH_CDBGSYS_AN
|
||||
s_endpgm_saved = functools.partial(SOPP, SOPPOp.S_ENDPGM_SAVED)
|
||||
s_set_gpr_idx_off = functools.partial(SOPP, SOPPOp.S_SET_GPR_IDX_OFF)
|
||||
s_set_gpr_idx_mode = functools.partial(SOPP, SOPPOp.S_SET_GPR_IDX_MODE)
|
||||
cdna4 = functools.partial(SOPP, SOPPOp.CDNA4)
|
||||
v_nop_e32 = functools.partial(VOP1, VOP1Op.V_NOP)
|
||||
v_mov_b32_e32 = functools.partial(VOP1, VOP1Op.V_MOV_B32)
|
||||
v_readfirstlane_b32_e32 = functools.partial(VOP1, VOP1Op.V_READFIRSTLANE_B32)
|
||||
@@ -937,7 +919,6 @@ v_prng_b32_e32 = functools.partial(VOP1, VOP1Op.V_PRNG_B32)
|
||||
v_permlane16_swap_b32_e32 = functools.partial(VOP1, VOP1Op.V_PERMLANE16_SWAP_B32)
|
||||
v_permlane32_swap_b32_e32 = functools.partial(VOP1, VOP1Op.V_PERMLANE32_SWAP_B32)
|
||||
v_cvt_f32_bf16_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_BF16)
|
||||
cdna4_e32 = functools.partial(VOP1, VOP1Op.CDNA4)
|
||||
v_cndmask_b32_e32 = functools.partial(VOP2, VOP2Op.V_CNDMASK_B32)
|
||||
v_add_f32_e32 = functools.partial(VOP2, VOP2Op.V_ADD_F32)
|
||||
v_sub_f32_e32 = functools.partial(VOP2, VOP2Op.V_SUB_F32)
|
||||
@@ -961,8 +942,8 @@ v_and_b32_e32 = functools.partial(VOP2, VOP2Op.V_AND_B32)
|
||||
v_or_b32_e32 = functools.partial(VOP2, VOP2Op.V_OR_B32)
|
||||
v_xor_b32_e32 = functools.partial(VOP2, VOP2Op.V_XOR_B32)
|
||||
v_dot2c_f32_bf16_e32 = functools.partial(VOP2, VOP2Op.V_DOT2C_F32_BF16)
|
||||
def v_fmamk_f32_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F32, vdst, src0, vsrc1, literal=K)
|
||||
def v_fmaak_f32_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F32, vdst, src0, vsrc1, literal=K)
|
||||
v_fmamk_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAMK_F32)
|
||||
v_fmaak_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAAK_F32)
|
||||
v_add_co_u32_e32 = functools.partial(VOP2, VOP2Op.V_ADD_CO_U32)
|
||||
v_sub_co_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUB_CO_U32)
|
||||
v_subrev_co_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_CO_U32)
|
||||
@@ -1000,7 +981,6 @@ v_dot8c_i32_i4_e32 = functools.partial(VOP2, VOP2Op.V_DOT8C_I32_I4)
|
||||
v_fmac_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F32)
|
||||
v_pk_fmac_f16_e32 = functools.partial(VOP2, VOP2Op.V_PK_FMAC_F16)
|
||||
v_xnor_b32_e32 = functools.partial(VOP2, VOP2Op.V_XNOR_B32)
|
||||
cdna4_e32 = functools.partial(VOP2, VOP2Op.CDNA4)
|
||||
v_cmp_class_f32 = functools.partial(VOP3A, VOP3AOp.V_CMP_CLASS_F32)
|
||||
v_cmpx_class_f32 = functools.partial(VOP3A, VOP3AOp.V_CMPX_CLASS_F32)
|
||||
v_cmp_class_f64 = functools.partial(VOP3A, VOP3AOp.V_CMP_CLASS_F64)
|
||||
@@ -1418,7 +1398,7 @@ v_cvt_scalef32_sr_pk32_fp6_f32 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32
|
||||
v_cvt_scalef32_sr_pk32_bf6_f32 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_SR_PK32_BF6_F32)
|
||||
v_cvt_scalef32_pk32_f32_fp6 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_F32_FP6)
|
||||
v_cvt_scalef32_pk32_f32_bf6 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_F32_BF6)
|
||||
cdna4 = functools.partial(VOP3A, VOP3AOp.CDNA4)
|
||||
v_cvt_scalef32_pk32_fp6_f16 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_FP6_F16)
|
||||
v_cvt_scalef32_pk32_fp6_bf16 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_FP6_BF16)
|
||||
v_cvt_scalef32_pk32_bf6_f16 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_BF6_F16)
|
||||
v_cvt_scalef32_pk32_bf6_bf16 = functools.partial(VOP3A, VOP3AOp.V_CVT_SCALEF32_PK32_BF6_BF16)
|
||||
@@ -1486,7 +1466,6 @@ v_div_scale_f32 = functools.partial(VOP3B, VOP3BOp.V_DIV_SCALE_F32)
|
||||
v_div_scale_f64 = functools.partial(VOP3B, VOP3BOp.V_DIV_SCALE_F64)
|
||||
v_mad_u64_u32 = functools.partial(VOP3B, VOP3BOp.V_MAD_U64_U32)
|
||||
v_mad_i64_i32 = functools.partial(VOP3B, VOP3BOp.V_MAD_I64_I32)
|
||||
cdna4 = functools.partial(VOP3B, VOP3BOp.CDNA4)
|
||||
v_pk_mad_i16 = functools.partial(VOP3P, VOP3POp.V_PK_MAD_I16)
|
||||
v_pk_mul_lo_u16 = functools.partial(VOP3P, VOP3POp.V_PK_MUL_LO_U16)
|
||||
v_pk_add_i16 = functools.partial(VOP3P, VOP3POp.V_PK_ADD_I16)
|
||||
@@ -1534,8 +1513,6 @@ v_smfmac_i32_16x16x128_i8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_I32_16X16X
|
||||
v_smfmac_f32_16x16x128_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_BF8)
|
||||
v_smfmac_f32_16x16x128_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_FP8)
|
||||
v_smfmac_f32_16x16x128_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_BF8)
|
||||
v_mfma_f32_16x16x8_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X8_XF32)
|
||||
v_mfma_f32_32x32x4_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X4_XF32)
|
||||
v_mfma_f32_32x32x1_2b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X1_2B_F32)
|
||||
v_mfma_f32_16x16x1_4b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X1_4B_F32)
|
||||
v_mfma_f32_4x4x1_16b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_4X4X1_16B_F32)
|
||||
@@ -1593,7 +1570,6 @@ v_smfmac_f32_32x32x32_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32
|
||||
v_smfmac_f32_32x32x32_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_BF8_FP8)
|
||||
v_smfmac_f32_32x32x32_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_BF8)
|
||||
v_smfmac_f32_32x32x32_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_FP8)
|
||||
cdna4 = functools.partial(VOP3P, VOP3POp.CDNA4)
|
||||
v_cmp_class_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_CLASS_F32)
|
||||
v_cmpx_class_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_CLASS_F32)
|
||||
v_cmp_class_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_CLASS_F64)
|
||||
@@ -1791,42 +1767,4 @@ v_cmpx_le_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LE_U64)
|
||||
v_cmpx_gt_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GT_U64)
|
||||
v_cmpx_ne_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NE_U64)
|
||||
v_cmpx_ge_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GE_U64)
|
||||
v_cmpx_t_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_T_U64)
|
||||
cdna4_e32 = functools.partial(VOPC, VOPCOp.CDNA4)
|
||||
|
||||
S_ADD_U32 = SrcEnum.S_ADD_U32
|
||||
S_SUB_U32 = SrcEnum.S_SUB_U32
|
||||
S_ADD_I32 = SrcEnum.S_ADD_I32
|
||||
S_SUB_I32 = SrcEnum.S_SUB_I32
|
||||
S_ADDC_U32 = SrcEnum.S_ADDC_U32
|
||||
S_SUBB_U32 = SrcEnum.S_SUBB_U32
|
||||
S_MIN_I32 = SrcEnum.S_MIN_I32
|
||||
FLAT_SCRATCH_LO = SrcEnum.FLAT_SCRATCH_LO
|
||||
FLAT_SCRATCH_HI = SrcEnum.FLAT_SCRATCH_HI
|
||||
XNACK_MASK_LO = SrcEnum.XNACK_MASK_LO
|
||||
XNACK_MASK_HI = SrcEnum.XNACK_MASK_HI
|
||||
VCC_LO = SrcEnum.VCC_LO
|
||||
VCC_HI = SrcEnum.VCC_HI
|
||||
M0 = SrcEnum.M0
|
||||
EXEC_LO = SrcEnum.EXEC_LO
|
||||
EXEC_HI = SrcEnum.EXEC_HI
|
||||
ZERO = SrcEnum.ZERO
|
||||
DPP8FI = SrcEnum.DPP8FI
|
||||
SHARED_BASE = SrcEnum.SHARED_BASE
|
||||
SHARED_LIMIT = SrcEnum.SHARED_LIMIT
|
||||
PRIVATE_BASE = SrcEnum.PRIVATE_BASE
|
||||
PRIVATE_LIMIT = SrcEnum.PRIVATE_LIMIT
|
||||
RESERVED = SrcEnum.RESERVED
|
||||
POS_HALF = SrcEnum.POS_HALF
|
||||
NEG_HALF = SrcEnum.NEG_HALF
|
||||
POS_ONE = SrcEnum.POS_ONE
|
||||
NEG_ONE = SrcEnum.NEG_ONE
|
||||
POS_TWO = SrcEnum.POS_TWO
|
||||
NEG_TWO = SrcEnum.NEG_TWO
|
||||
POS_FOUR = SrcEnum.POS_FOUR
|
||||
NEG_FOUR = SrcEnum.NEG_FOUR
|
||||
INV_2PI = SrcEnum.INV_2PI
|
||||
VCCZ = SrcEnum.VCCZ
|
||||
EXECZ = SrcEnum.EXECZ
|
||||
SCC = SrcEnum.SCC
|
||||
LDS_DIRECT = SrcEnum.LDS_DIRECT
|
||||
v_cmpx_t_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_T_U64)
|
||||
File diff suppressed because one or more lines are too long
@@ -1,34 +1,97 @@
|
||||
# autogenerated from AMD RDNA3.5 ISA PDF by pdf.py - do not edit
|
||||
# autogenerated from AMD ISA PDF by pdf.py - do not edit
|
||||
from enum import IntEnum
|
||||
|
||||
class SrcEnum(IntEnum):
|
||||
VCC_LO = 106
|
||||
VCC_HI = 107
|
||||
NULL = 124
|
||||
M0 = 125
|
||||
EXEC_LO = 126
|
||||
EXEC_HI = 127
|
||||
ZERO = 128
|
||||
DPP8 = 233
|
||||
DPP8FI = 234
|
||||
SHARED_BASE = 235
|
||||
SHARED_LIMIT = 236
|
||||
PRIVATE_BASE = 237
|
||||
PRIVATE_LIMIT = 238
|
||||
POS_HALF = 240
|
||||
NEG_HALF = 241
|
||||
POS_ONE = 242
|
||||
NEG_ONE = 243
|
||||
POS_TWO = 244
|
||||
NEG_TWO = 245
|
||||
POS_FOUR = 246
|
||||
NEG_FOUR = 247
|
||||
INV_2PI = 248
|
||||
DPP16 = 250
|
||||
VCCZ = 251
|
||||
EXECZ = 252
|
||||
SCC = 253
|
||||
LDS_DIRECT = 254
|
||||
class BufFmt(IntEnum):
|
||||
BUF_FMT_8_UNORM = 1
|
||||
BUF_FMT_8_SNORM = 2
|
||||
BUF_FMT_8_USCALED = 3
|
||||
BUF_FMT_8_SSCALED = 4
|
||||
BUF_FMT_8_UINT = 5
|
||||
BUF_FMT_8_SINT = 6
|
||||
BUF_FMT_16_UNORM = 7
|
||||
BUF_FMT_16_SNORM = 8
|
||||
BUF_FMT_16_USCALED = 9
|
||||
BUF_FMT_16_SSCALED = 10
|
||||
BUF_FMT_16_UINT = 11
|
||||
BUF_FMT_16_SINT = 12
|
||||
BUF_FMT_16_FLOAT = 13
|
||||
BUF_FMT_8_8_UNORM = 14
|
||||
BUF_FMT_8_8_SNORM = 15
|
||||
BUF_FMT_8_8_USCALED = 16
|
||||
BUF_FMT_8_8_SSCALED = 17
|
||||
BUF_FMT_8_8_UINT = 18
|
||||
BUF_FMT_8_8_SINT = 19
|
||||
BUF_FMT_32_UINT = 20
|
||||
BUF_FMT_32_SINT = 21
|
||||
BUF_FMT_32_FLOAT = 22
|
||||
BUF_FMT_16_16_UNORM = 23
|
||||
BUF_FMT_16_16_SNORM = 24
|
||||
BUF_FMT_16_16_USCALED = 25
|
||||
BUF_FMT_16_16_SSCALED = 26
|
||||
BUF_FMT_16_16_UINT = 27
|
||||
BUF_FMT_16_16_SINT = 28
|
||||
BUF_FMT_16_16_FLOAT = 29
|
||||
BUF_FMT_10_11_11_FLOAT = 30
|
||||
BUF_FMT_11_11_10_FLOAT = 31
|
||||
BUF_FMT_10_10_10_2_UNORM = 32
|
||||
BUF_FMT_10_10_10_2_SNORM = 33
|
||||
BUF_FMT_10_10_10_2_UINT = 34
|
||||
BUF_FMT_10_10_10_2_SINT = 35
|
||||
BUF_FMT_2_10_10_10_UNORM = 36
|
||||
BUF_FMT_2_10_10_10_SNORM = 37
|
||||
BUF_FMT_2_10_10_10_USCALED = 38
|
||||
BUF_FMT_2_10_10_10_SSCALED = 39
|
||||
BUF_FMT_2_10_10_10_UINT = 40
|
||||
BUF_FMT_2_10_10_10_SINT = 41
|
||||
BUF_FMT_8_8_8_8_UNORM = 42
|
||||
BUF_FMT_8_8_8_8_SNORM = 43
|
||||
BUF_FMT_8_8_8_8_USCALED = 44
|
||||
BUF_FMT_8_8_8_8_SSCALED = 45
|
||||
BUF_FMT_8_8_8_8_UINT = 46
|
||||
BUF_FMT_8_8_8_8_SINT = 47
|
||||
BUF_FMT_32_32_UINT = 48
|
||||
BUF_FMT_32_32_SINT = 49
|
||||
BUF_FMT_32_32_FLOAT = 50
|
||||
BUF_FMT_16_16_16_16_UNORM = 51
|
||||
BUF_FMT_16_16_16_16_SNORM = 52
|
||||
BUF_FMT_16_16_16_16_USCALED = 53
|
||||
BUF_FMT_16_16_16_16_SSCALED = 54
|
||||
BUF_FMT_16_16_16_16_UINT = 55
|
||||
BUF_FMT_16_16_16_16_SINT = 56
|
||||
BUF_FMT_16_16_16_16_FLOAT = 57
|
||||
BUF_FMT_32_32_32_UINT = 58
|
||||
BUF_FMT_32_32_32_SINT = 59
|
||||
BUF_FMT_32_32_32_FLOAT = 60
|
||||
BUF_FMT_32_32_32_32_UINT = 61
|
||||
BUF_FMT_8_SRGB = 64
|
||||
BUF_FMT_8_8_SRGB = 65
|
||||
BUF_FMT_8_8_8_8_SRGB = 66
|
||||
BUF_FMT_5_9_9_9_FLOAT = 67
|
||||
BUF_FMT_5_6_5_UNORM = 68
|
||||
BUF_FMT_1_5_5_5_UNORM = 69
|
||||
BUF_FMT_5_5_5_1_UNORM = 70
|
||||
BUF_FMT_4_4_4_4_UNORM = 71
|
||||
BUF_FMT_4_4_UNORM = 72
|
||||
BUF_FMT_1_UNORM = 73
|
||||
BUF_FMT_1_REVERSED_UNORM = 74
|
||||
BUF_FMT_32_FLOAT_CLAMP = 75
|
||||
BUF_FMT_8_24_UNORM = 76
|
||||
BUF_FMT_8_24_UINT = 77
|
||||
BUF_FMT_24_8_UNORM = 78
|
||||
BUF_FMT_24_8_UINT = 79
|
||||
BUF_FMT_X24_8_32_UINT = 80
|
||||
BUF_FMT_X24_8_32_FLOAT = 81
|
||||
BUF_FMT_GB_GR_UNORM = 82
|
||||
BUF_FMT_GB_GR_SNORM = 83
|
||||
BUF_FMT_GB_GR_UINT = 84
|
||||
BUF_FMT_GB_GR_SRGB = 85
|
||||
BUF_FMT_BG_RG_UNORM = 86
|
||||
BUF_FMT_BG_RG_SNORM = 87
|
||||
BUF_FMT_BG_RG_UINT = 88
|
||||
BUF_FMT_BG_RG_SRGB = 89
|
||||
BUF_FMT_BC1_UNORM = 109
|
||||
BUF_FMT_BC1_SRGB = 110
|
||||
BUF_FMT_BC2_UNORM = 111
|
||||
|
||||
class DSOp(IntEnum):
|
||||
DS_ADD_U32 = 0
|
||||
@@ -1372,7 +1435,6 @@ class VOP3POp(IntEnum):
|
||||
V_WMMA_I32_16X16X16_IU4 = 69
|
||||
|
||||
class VOP3SDOp(IntEnum):
|
||||
DWORD = 1
|
||||
V_ADD_CO_CI_U32 = 288
|
||||
V_SUB_CO_CI_U32 = 289
|
||||
V_SUBREV_CO_CI_U32 = 290
|
||||
@@ -1594,68 +1656,3 @@ class VOPDOp(IntEnum):
|
||||
V_DUAL_ADD_NC_U32 = 16
|
||||
V_DUAL_LSHLREV_B32 = 17
|
||||
V_DUAL_AND_B32 = 18
|
||||
|
||||
class BufFmt(IntEnum):
|
||||
BUF_FMT_8_UNORM = 1
|
||||
BUF_FMT_8_SNORM = 2
|
||||
BUF_FMT_8_USCALED = 3
|
||||
BUF_FMT_8_SSCALED = 4
|
||||
BUF_FMT_8_UINT = 5
|
||||
BUF_FMT_8_SINT = 6
|
||||
BUF_FMT_16_UNORM = 7
|
||||
BUF_FMT_16_SNORM = 8
|
||||
BUF_FMT_16_USCALED = 9
|
||||
BUF_FMT_16_SSCALED = 10
|
||||
BUF_FMT_16_UINT = 11
|
||||
BUF_FMT_16_SINT = 12
|
||||
BUF_FMT_16_FLOAT = 13
|
||||
BUF_FMT_8_8_UNORM = 14
|
||||
BUF_FMT_8_8_SNORM = 15
|
||||
BUF_FMT_8_8_USCALED = 16
|
||||
BUF_FMT_8_8_SSCALED = 17
|
||||
BUF_FMT_8_8_UINT = 18
|
||||
BUF_FMT_8_8_SINT = 19
|
||||
BUF_FMT_32_UINT = 20
|
||||
BUF_FMT_32_SINT = 21
|
||||
BUF_FMT_32_FLOAT = 22
|
||||
BUF_FMT_16_16_UNORM = 23
|
||||
BUF_FMT_16_16_SNORM = 24
|
||||
BUF_FMT_16_16_USCALED = 25
|
||||
BUF_FMT_16_16_SSCALED = 26
|
||||
BUF_FMT_16_16_UINT = 27
|
||||
BUF_FMT_16_16_SINT = 28
|
||||
BUF_FMT_16_16_FLOAT = 29
|
||||
BUF_FMT_10_11_11_FLOAT = 30
|
||||
BUF_FMT_11_11_10_FLOAT = 31
|
||||
BUF_FMT_10_10_10_2_UNORM = 32
|
||||
BUF_FMT_10_10_10_2_SNORM = 33
|
||||
BUF_FMT_10_10_10_2_UINT = 34
|
||||
BUF_FMT_10_10_10_2_SINT = 35
|
||||
BUF_FMT_2_10_10_10_UNORM = 36
|
||||
BUF_FMT_2_10_10_10_SNORM = 37
|
||||
BUF_FMT_2_10_10_10_USCALED = 38
|
||||
BUF_FMT_2_10_10_10_SSCALED = 39
|
||||
BUF_FMT_2_10_10_10_UINT = 40
|
||||
BUF_FMT_2_10_10_10_SINT = 41
|
||||
BUF_FMT_8_8_8_8_UNORM = 42
|
||||
BUF_FMT_8_8_8_8_SNORM = 43
|
||||
BUF_FMT_8_8_8_8_USCALED = 44
|
||||
BUF_FMT_8_8_8_8_SSCALED = 45
|
||||
BUF_FMT_8_8_8_8_UINT = 46
|
||||
BUF_FMT_8_8_8_8_SINT = 47
|
||||
BUF_FMT_32_32_UINT = 48
|
||||
BUF_FMT_32_32_SINT = 49
|
||||
BUF_FMT_32_32_FLOAT = 50
|
||||
BUF_FMT_16_16_16_16_UNORM = 51
|
||||
BUF_FMT_16_16_16_16_SNORM = 52
|
||||
BUF_FMT_16_16_16_16_USCALED = 53
|
||||
BUF_FMT_16_16_16_16_SSCALED = 54
|
||||
BUF_FMT_16_16_16_16_UINT = 55
|
||||
BUF_FMT_16_16_16_16_SINT = 56
|
||||
BUF_FMT_16_16_16_16_FLOAT = 57
|
||||
BUF_FMT_32_32_32_UINT = 58
|
||||
BUF_FMT_32_32_32_SINT = 59
|
||||
BUF_FMT_32_32_32_FLOAT = 60
|
||||
BUF_FMT_32_32_32_32_UINT = 61
|
||||
BUF_FMT_32_32_32_32_SINT = 62
|
||||
BUF_FMT_32_32_32_32_FLOAT = 63
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
# autogenerated from AMD RDNA3.5 ISA PDF by pdf.py - do not edit
|
||||
# autogenerated from AMD ISA PDF by pdf.py - do not edit
|
||||
# ruff: noqa: F401,F403
|
||||
from typing import Annotated
|
||||
from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, Inst96, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
|
||||
from extra.assembly.amd.dsl import *
|
||||
from extra.assembly.amd.autogen.rdna3.enum import *
|
||||
import functools
|
||||
|
||||
# instruction formats
|
||||
class DPP16(Inst64):
|
||||
class DPP16(Inst):
|
||||
src0:Src = bits[39:32]
|
||||
dpp_ctrl = bits[48:40]
|
||||
fi = bits[50]
|
||||
@@ -18,7 +17,7 @@ class DPP16(Inst64):
|
||||
bank_mask = bits[59:56]
|
||||
row_mask = bits[63:60]
|
||||
|
||||
class DPP8(Inst64):
|
||||
class DPP8(Inst):
|
||||
src0:Src = bits[39:32]
|
||||
lane_sel0 = bits[42:40]
|
||||
lane_sel1 = bits[45:43]
|
||||
@@ -29,7 +28,7 @@ class DPP8(Inst64):
|
||||
lane_sel6 = bits[60:58]
|
||||
lane_sel7 = bits[63:61]
|
||||
|
||||
class DS(Inst64):
|
||||
class DS(Inst):
|
||||
encoding = bits[31:26] == 0b110110
|
||||
op:Annotated[BitField, DSOp] = bits[25:18]
|
||||
vdst:VGPRField = bits[63:56]
|
||||
@@ -40,18 +39,18 @@ class DS(Inst64):
|
||||
offset1 = bits[15:8]
|
||||
gds = bits[17]
|
||||
|
||||
class EXP(Inst64):
|
||||
class EXP(Inst):
|
||||
encoding = bits[31:26] == 0b111110
|
||||
vsrc0:VGPRField = bits[39:32]
|
||||
vsrc1:VGPRField = bits[47:40]
|
||||
vsrc2:VGPRField = bits[55:48]
|
||||
vsrc3:VGPRField = bits[63:56]
|
||||
en = bits[3:0]
|
||||
target = bits[9:4]
|
||||
vsrc0 = bits[39:32]
|
||||
vsrc1:VGPRField = bits[47:40]
|
||||
vsrc2 = bits[55:48]
|
||||
vsrc3 = bits[63:56]
|
||||
done = bits[11]
|
||||
row = bits[13]
|
||||
|
||||
class FLAT(Inst64):
|
||||
class FLAT(Inst):
|
||||
encoding = bits[31:26] == 0b110111
|
||||
op:Annotated[BitField, FLATOp] = bits[24:18]
|
||||
vdst:VGPRField = bits[63:56]
|
||||
@@ -60,12 +59,12 @@ class FLAT(Inst64):
|
||||
saddr:SSrc = bits[54:48]
|
||||
offset:Imm = bits[12:0]
|
||||
seg = bits[17:16]
|
||||
dlc = bits[13]
|
||||
glc = bits[14]
|
||||
dlc = bits[13]
|
||||
slc = bits[15]
|
||||
sve = bits[55]
|
||||
|
||||
class LDSDIR(Inst32):
|
||||
class LDSDIR(Inst):
|
||||
encoding = bits[31:24] == 0b11001110
|
||||
op = bits[21:20]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
@@ -73,29 +72,29 @@ class LDSDIR(Inst32):
|
||||
attr_chan = bits[9:8]
|
||||
wait_va = bits[19:16]
|
||||
|
||||
class MIMG(Inst64):
|
||||
class MIMG(Inst):
|
||||
encoding = bits[31:26] == 0b111100
|
||||
op:Annotated[BitField, MIMGOp] = bits[25:18]
|
||||
vdata:VGPRField = bits[47:40]
|
||||
vaddr:VGPRField = bits[39:32]
|
||||
srsrc:SGPRField = bits[52:48]
|
||||
ssamp = bits[62:58]
|
||||
ssamp:SGPRField = bits[62:58]
|
||||
dmask = bits[11:8]
|
||||
dim = bits[4:2]
|
||||
unrm = bits[7]
|
||||
dlc = bits[13]
|
||||
glc = bits[14]
|
||||
dlc = bits[13]
|
||||
slc = bits[12]
|
||||
tfe = bits[53]
|
||||
unrm = bits[7]
|
||||
nsa = bits[0]
|
||||
r128 = bits[15]
|
||||
a16 = bits[16]
|
||||
d16 = bits[17]
|
||||
tfe = bits[53]
|
||||
lwe = bits[54]
|
||||
addr1 = bits[71:64]
|
||||
addr2 = bits[79:72]
|
||||
|
||||
class MTBUF(Inst64):
|
||||
class MTBUF(Inst):
|
||||
encoding = bits[31:26] == 0b111010
|
||||
op:Annotated[BitField, MTBUFOp] = bits[18:15]
|
||||
vdata:VGPRField = bits[47:40]
|
||||
@@ -111,7 +110,7 @@ class MTBUF(Inst64):
|
||||
slc = bits[12]
|
||||
tfe = bits[53]
|
||||
|
||||
class MUBUF(Inst64):
|
||||
class MUBUF(Inst):
|
||||
encoding = bits[31:26] == 0b111000
|
||||
op:Annotated[BitField, MUBUFOp] = bits[25:18]
|
||||
vdata:VGPRField = bits[47:40]
|
||||
@@ -126,7 +125,7 @@ class MUBUF(Inst64):
|
||||
slc = bits[12]
|
||||
tfe = bits[53]
|
||||
|
||||
class SMEM(Inst64):
|
||||
class SMEM(Inst):
|
||||
encoding = bits[31:26] == 0b111101
|
||||
op:Annotated[BitField, SMEMOp] = bits[25:18]
|
||||
sdata:SGPRField = bits[12:6]
|
||||
@@ -136,62 +135,63 @@ class SMEM(Inst64):
|
||||
glc = bits[14]
|
||||
dlc = bits[13]
|
||||
|
||||
class SOP1(Inst32):
|
||||
class SOP1(Inst):
|
||||
encoding = bits[31:23] == 0b101111101
|
||||
op:Annotated[BitField, SOP1Op] = bits[15:8]
|
||||
sdst:SGPRField = bits[22:16]
|
||||
ssrc0:SSrc = bits[7:0]
|
||||
|
||||
class SOP2(Inst32):
|
||||
class SOP2(Inst):
|
||||
encoding = bits[31:30] == 0b10
|
||||
op:Annotated[BitField, SOP2Op] = bits[29:23]
|
||||
sdst:SGPRField = bits[22:16]
|
||||
ssrc0:SSrc = bits[7:0]
|
||||
ssrc1:SSrc = bits[15:8]
|
||||
|
||||
class SOPC(Inst32):
|
||||
class SOPC(Inst):
|
||||
encoding = bits[31:23] == 0b101111110
|
||||
op:Annotated[BitField, SOPCOp] = bits[22:16]
|
||||
ssrc0:SSrc = bits[7:0]
|
||||
ssrc1:SSrc = bits[15:8]
|
||||
|
||||
class SOPK(Inst32):
|
||||
class SOPK(Inst):
|
||||
encoding = bits[31:28] == 0b1011
|
||||
op:Annotated[BitField, SOPKOp] = bits[27:23]
|
||||
sdst:SGPRField = bits[22:16]
|
||||
simm16:SImm = bits[15:0]
|
||||
|
||||
class SOPP(Inst32):
|
||||
class SOPP(Inst):
|
||||
encoding = bits[31:23] == 0b101111111
|
||||
op:Annotated[BitField, SOPPOp] = bits[22:16]
|
||||
simm16:SImm = bits[15:0]
|
||||
|
||||
class VINTERP(Inst64):
|
||||
class VINTERP(Inst):
|
||||
encoding = bits[31:24] == 0b11001101
|
||||
op:Annotated[BitField, VINTERPOp] = bits[22:16]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
src0:Src = bits[40:32]
|
||||
src0:Src = bits[40:32]
|
||||
src1:Src = bits[49:41]
|
||||
src2:Src = bits[58:50]
|
||||
waitexp = bits[10:8]
|
||||
neg = bits[63:61]
|
||||
clmp = bits[15]
|
||||
opsel = bits[14:11]
|
||||
neg = bits[63:61]
|
||||
waitexp = bits[10:8]
|
||||
|
||||
class VOP1(Inst32):
|
||||
encoding = bits[31:25] == 0b111111
|
||||
class VOP1(Inst):
|
||||
encoding = bits[31:25] == 0b0111111
|
||||
op:Annotated[BitField, VOP1Op] = bits[16:9]
|
||||
vdst:VGPRField = bits[24:17]
|
||||
src0:Src = bits[8:0]
|
||||
|
||||
class VOP2(Inst32):
|
||||
encoding = bits[31] == 0
|
||||
class VOP2(Inst):
|
||||
encoding = bits[31] == 0b0
|
||||
op:Annotated[BitField, VOP2Op] = bits[30:25]
|
||||
vdst:VGPRField = bits[24:17]
|
||||
src0:Src = bits[8:0]
|
||||
vsrc1:VGPRField = bits[16:9]
|
||||
|
||||
class VOP3(Inst64):
|
||||
class VOP3(Inst):
|
||||
encoding = bits[31:26] == 0b110101
|
||||
op:Annotated[BitField, VOP3Op] = bits[25:16]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
@@ -204,9 +204,8 @@ class VOP3(Inst64):
|
||||
clmp = bits[15]
|
||||
opsel = bits[14:11]
|
||||
|
||||
class VOP3P(Inst64):
|
||||
class VOP3P(Inst):
|
||||
encoding = bits[31:24] == 0b11001100
|
||||
_defaults = {'opsel_hi': 3, 'opsel_hi2': 1}
|
||||
op:Annotated[BitField, VOP3POp] = bits[22:16]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
src0:Src = bits[40:32]
|
||||
@@ -214,12 +213,12 @@ class VOP3P(Inst64):
|
||||
src2:Src = bits[58:50]
|
||||
neg = bits[63:61]
|
||||
neg_hi = bits[10:8]
|
||||
clmp = bits[15]
|
||||
opsel = bits[13:11]
|
||||
opsel_hi = bits[60:59]
|
||||
clmp = bits[15]
|
||||
opsel_hi2 = bits[14]
|
||||
|
||||
class VOP3SD(Inst64):
|
||||
class VOP3SD(Inst):
|
||||
encoding = bits[31:26] == 0b110101
|
||||
op:Annotated[BitField, VOP3SDOp] = bits[25:16]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
@@ -227,26 +226,26 @@ class VOP3SD(Inst64):
|
||||
src0:Src = bits[40:32]
|
||||
src1:Src = bits[49:41]
|
||||
src2:Src = bits[58:50]
|
||||
clmp = bits[15]
|
||||
omod = bits[60:59]
|
||||
neg = bits[63:61]
|
||||
clmp = bits[15]
|
||||
|
||||
class VOPC(Inst32):
|
||||
encoding = bits[31:25] == 0b111110
|
||||
class VOPC(Inst):
|
||||
encoding = bits[31:25] == 0b0111110
|
||||
op:Annotated[BitField, VOPCOp] = bits[24:17]
|
||||
src0:Src = bits[8:0]
|
||||
vsrc1:VGPRField = bits[16:9]
|
||||
|
||||
class VOPD(Inst64):
|
||||
class VOPD(Inst):
|
||||
encoding = bits[31:26] == 0b110010
|
||||
opx:Annotated[BitField, VOPDOp] = bits[25:22]
|
||||
opy:Annotated[BitField, VOPDOp] = bits[21:17]
|
||||
vdstx:VGPRField = bits[63:56]
|
||||
vdstx = bits[63:56]
|
||||
vdsty:VDSTYEnc = bits[55:49]
|
||||
srcx0:Src = bits[8:0]
|
||||
vsrcx1:VGPRField = bits[16:9]
|
||||
srcy0:Src = bits[40:32]
|
||||
vsrcy1:VGPRField = bits[48:41]
|
||||
vsrcx1 = bits[16:9]
|
||||
vsrcy1 = bits[48:41]
|
||||
|
||||
# instruction helpers
|
||||
ds_add_u32 = functools.partial(DS, DSOp.DS_ADD_U32)
|
||||
@@ -1077,16 +1076,16 @@ v_add_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_ADD_NC_U32)
|
||||
v_sub_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUB_NC_U32)
|
||||
v_subrev_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_NC_U32)
|
||||
v_fmac_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F32)
|
||||
def v_fmamk_f32_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F32, vdst, src0, vsrc1, literal=K)
|
||||
def v_fmaak_f32_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F32, vdst, src0, vsrc1, literal=K)
|
||||
v_fmamk_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAMK_F32)
|
||||
v_fmaak_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAAK_F32)
|
||||
v_cvt_pk_rtz_f16_f32_e32 = functools.partial(VOP2, VOP2Op.V_CVT_PK_RTZ_F16_F32)
|
||||
v_add_f16_e32 = functools.partial(VOP2, VOP2Op.V_ADD_F16)
|
||||
v_sub_f16_e32 = functools.partial(VOP2, VOP2Op.V_SUB_F16)
|
||||
v_subrev_f16_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_F16)
|
||||
v_mul_f16_e32 = functools.partial(VOP2, VOP2Op.V_MUL_F16)
|
||||
v_fmac_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F16)
|
||||
def v_fmamk_f16_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F16, vdst, src0, vsrc1, literal=K)
|
||||
def v_fmaak_f16_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F16, vdst, src0, vsrc1, literal=K)
|
||||
v_fmamk_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAMK_F16)
|
||||
v_fmaak_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAAK_F16)
|
||||
v_max_f16_e32 = functools.partial(VOP2, VOP2Op.V_MAX_F16)
|
||||
v_min_f16_e32 = functools.partial(VOP2, VOP2Op.V_MIN_F16)
|
||||
v_ldexp_f16_e32 = functools.partial(VOP2, VOP2Op.V_LDEXP_F16)
|
||||
@@ -1554,7 +1553,6 @@ v_wmma_f16_16x16x16_f16 = functools.partial(VOP3P, VOP3POp.V_WMMA_F16_16X16X16_F
|
||||
v_wmma_bf16_16x16x16_bf16 = functools.partial(VOP3P, VOP3POp.V_WMMA_BF16_16X16X16_BF16)
|
||||
v_wmma_i32_16x16x16_iu8 = functools.partial(VOP3P, VOP3POp.V_WMMA_I32_16X16X16_IU8)
|
||||
v_wmma_i32_16x16x16_iu4 = functools.partial(VOP3P, VOP3POp.V_WMMA_I32_16X16X16_IU4)
|
||||
dword = functools.partial(VOP3SD, VOP3SDOp.DWORD)
|
||||
v_add_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_ADD_CO_CI_U32)
|
||||
v_sub_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUB_CO_CI_U32)
|
||||
v_subrev_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUBREV_CO_CI_U32)
|
||||
@@ -1771,31 +1769,4 @@ v_dual_dot2acc_f32_f16 = functools.partial(VOPD, VOPDOp.V_DUAL_DOT2ACC_F32_F16)
|
||||
v_dual_dot2acc_f32_bf16 = functools.partial(VOPD, VOPDOp.V_DUAL_DOT2ACC_F32_BF16)
|
||||
v_dual_add_nc_u32 = functools.partial(VOPD, VOPDOp.V_DUAL_ADD_NC_U32)
|
||||
v_dual_lshlrev_b32 = functools.partial(VOPD, VOPDOp.V_DUAL_LSHLREV_B32)
|
||||
v_dual_and_b32 = functools.partial(VOPD, VOPDOp.V_DUAL_AND_B32)
|
||||
|
||||
VCC_LO = SrcEnum.VCC_LO
|
||||
VCC_HI = SrcEnum.VCC_HI
|
||||
NULL = SrcEnum.NULL
|
||||
M0 = SrcEnum.M0
|
||||
EXEC_LO = SrcEnum.EXEC_LO
|
||||
EXEC_HI = SrcEnum.EXEC_HI
|
||||
ZERO = SrcEnum.ZERO
|
||||
DPP8FI = SrcEnum.DPP8FI
|
||||
SHARED_BASE = SrcEnum.SHARED_BASE
|
||||
SHARED_LIMIT = SrcEnum.SHARED_LIMIT
|
||||
PRIVATE_BASE = SrcEnum.PRIVATE_BASE
|
||||
PRIVATE_LIMIT = SrcEnum.PRIVATE_LIMIT
|
||||
POS_HALF = SrcEnum.POS_HALF
|
||||
NEG_HALF = SrcEnum.NEG_HALF
|
||||
POS_ONE = SrcEnum.POS_ONE
|
||||
NEG_ONE = SrcEnum.NEG_ONE
|
||||
POS_TWO = SrcEnum.POS_TWO
|
||||
NEG_TWO = SrcEnum.NEG_TWO
|
||||
POS_FOUR = SrcEnum.POS_FOUR
|
||||
NEG_FOUR = SrcEnum.NEG_FOUR
|
||||
INV_2PI = SrcEnum.INV_2PI
|
||||
VCCZ = SrcEnum.VCCZ
|
||||
EXECZ = SrcEnum.EXECZ
|
||||
SCC = SrcEnum.SCC
|
||||
LDS_DIRECT = SrcEnum.LDS_DIRECT
|
||||
OFF = NULL
|
||||
v_dual_and_b32 = functools.partial(VOPD, VOPDOp.V_DUAL_AND_B32)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,34 +1,100 @@
|
||||
# autogenerated from AMD RDNA4 ISA PDF by pdf.py - do not edit
|
||||
# autogenerated from AMD ISA PDF by pdf.py - do not edit
|
||||
from enum import IntEnum
|
||||
|
||||
class SrcEnum(IntEnum):
|
||||
VCC_LO = 106
|
||||
VCC_HI = 107
|
||||
NULL = 124
|
||||
M0 = 125
|
||||
EXEC_LO = 126
|
||||
EXEC_HI = 127
|
||||
ZERO = 128
|
||||
DPP8 = 233
|
||||
DPP8FI = 234
|
||||
SHARED_BASE = 235
|
||||
SHARED_LIMIT = 236
|
||||
PRIVATE_BASE = 237
|
||||
PRIVATE_LIMIT = 238
|
||||
POS_HALF = 240
|
||||
NEG_HALF = 241
|
||||
POS_ONE = 242
|
||||
NEG_ONE = 243
|
||||
POS_TWO = 244
|
||||
NEG_TWO = 245
|
||||
POS_FOUR = 246
|
||||
NEG_FOUR = 247
|
||||
INV_2PI = 248
|
||||
DPP16 = 250
|
||||
VCCZ = 251
|
||||
EXECZ = 252
|
||||
SCC = 253
|
||||
LDS_DIRECT = 254
|
||||
class BufFmt(IntEnum):
|
||||
BUF_FMT_8_UNORM = 1
|
||||
BUF_FMT_8_SNORM = 2
|
||||
BUF_FMT_8_USCALED = 3
|
||||
BUF_FMT_8_SSCALED = 4
|
||||
BUF_FMT_8_UINT = 5
|
||||
BUF_FMT_8_SINT = 6
|
||||
BUF_FMT_16_UNORM = 7
|
||||
BUF_FMT_16_SNORM = 8
|
||||
BUF_FMT_16_USCALED = 9
|
||||
BUF_FMT_16_SSCALED = 10
|
||||
BUF_FMT_16_UINT = 11
|
||||
BUF_FMT_16_SINT = 12
|
||||
BUF_FMT_16_FLOAT = 13
|
||||
BUF_FMT_8_8_UNORM = 14
|
||||
BUF_FMT_8_8_SNORM = 15
|
||||
BUF_FMT_8_8_USCALED = 16
|
||||
BUF_FMT_8_8_SSCALED = 17
|
||||
BUF_FMT_8_8_UINT = 18
|
||||
BUF_FMT_8_8_SINT = 19
|
||||
BUF_FMT_32_UINT = 20
|
||||
BUF_FMT_32_SINT = 21
|
||||
BUF_FMT_32_FLOAT = 22
|
||||
BUF_FMT_16_16_UNORM = 23
|
||||
BUF_FMT_16_16_SNORM = 24
|
||||
BUF_FMT_16_16_USCALED = 25
|
||||
BUF_FMT_16_16_SSCALED = 26
|
||||
BUF_FMT_16_16_UINT = 27
|
||||
BUF_FMT_16_16_SINT = 28
|
||||
BUF_FMT_16_16_FLOAT = 29
|
||||
BUF_FMT_10_11_11_FLOAT = 30
|
||||
BUF_FMT_11_11_10_FLOAT = 31
|
||||
BUF_FMT_10_10_10_2_UNORM = 32
|
||||
BUF_FMT_10_10_10_2_SNORM = 33
|
||||
BUF_FMT_10_10_10_2_UINT = 34
|
||||
BUF_FMT_10_10_10_2_SINT = 35
|
||||
BUF_FMT_2_10_10_10_UNORM = 36
|
||||
BUF_FMT_2_10_10_10_SNORM = 37
|
||||
BUF_FMT_2_10_10_10_USCALED = 38
|
||||
BUF_FMT_2_10_10_10_SSCALED = 39
|
||||
BUF_FMT_2_10_10_10_UINT = 40
|
||||
BUF_FMT_2_10_10_10_SINT = 41
|
||||
BUF_FMT_8_8_8_8_UNORM = 42
|
||||
BUF_FMT_8_8_8_8_SNORM = 43
|
||||
BUF_FMT_8_8_8_8_USCALED = 44
|
||||
BUF_FMT_8_8_8_8_SSCALED = 45
|
||||
BUF_FMT_8_8_8_8_UINT = 46
|
||||
BUF_FMT_8_8_8_8_SINT = 47
|
||||
BUF_FMT_32_32_UINT = 48
|
||||
BUF_FMT_32_32_SINT = 49
|
||||
BUF_FMT_32_32_FLOAT = 50
|
||||
BUF_FMT_16_16_16_16_UNORM = 51
|
||||
BUF_FMT_16_16_16_16_SNORM = 52
|
||||
BUF_FMT_16_16_16_16_USCALED = 53
|
||||
BUF_FMT_16_16_16_16_SSCALED = 54
|
||||
BUF_FMT_16_16_16_16_UINT = 55
|
||||
BUF_FMT_16_16_16_16_SINT = 56
|
||||
BUF_FMT_16_16_16_16_FLOAT = 57
|
||||
BUF_FMT_32_32_32_UINT = 58
|
||||
BUF_FMT_32_32_32_SINT = 59
|
||||
BUF_FMT_32_32_32_FLOAT = 60
|
||||
BUF_FMT_32_32_32_32_UINT = 61
|
||||
BUF_FMT_32_32_32_32_SINT = 62
|
||||
BUF_FMT_32_32_32_32_FLOAT = 63
|
||||
BUF_FMT_8_SRGB = 64
|
||||
BUF_FMT_8_8_SRGB = 65
|
||||
BUF_FMT_8_8_8_8_SRGB = 66
|
||||
BUF_FMT_5_9_9_9_FLOAT = 67
|
||||
BUF_FMT_5_6_5_UNORM = 68
|
||||
BUF_FMT_1_5_5_5_UNORM = 69
|
||||
BUF_FMT_5_5_5_1_UNORM = 70
|
||||
BUF_FMT_4_4_4_4_UNORM = 71
|
||||
BUF_FMT_4_4_UNORM = 72
|
||||
BUF_FMT_1_UNORM = 73
|
||||
BUF_FMT_1_REVERSED_UNORM = 74
|
||||
BUF_FMT_32_FLOAT_CLAMP = 75
|
||||
BUF_FMT_8_24_UNORM = 76
|
||||
BUF_FMT_8_24_UINT = 77
|
||||
BUF_FMT_24_8_UNORM = 78
|
||||
BUF_FMT_24_8_UINT = 79
|
||||
BUF_FMT_X24_8_32_UINT = 80
|
||||
BUF_FMT_X24_8_32_FLOAT = 81
|
||||
BUF_FMT_GB_GR_UNORM = 82
|
||||
BUF_FMT_GB_GR_SNORM = 83
|
||||
BUF_FMT_GB_GR_UINT = 84
|
||||
BUF_FMT_GB_GR_SRGB = 85
|
||||
BUF_FMT_BG_RG_UNORM = 86
|
||||
BUF_FMT_BG_RG_SNORM = 87
|
||||
BUF_FMT_BG_RG_UINT = 88
|
||||
BUF_FMT_BG_RG_SRGB = 89
|
||||
BUF_FMT_BC1_UNORM = 109
|
||||
BUF_FMT_BC1_SRGB = 110
|
||||
BUF_FMT_BC2_UNORM = 111
|
||||
BUF_FMT_BC2_SRGB = 112
|
||||
|
||||
class DSOp(IntEnum):
|
||||
DS_ADD_U32 = 0
|
||||
@@ -1347,7 +1413,6 @@ class VOP3POp(IntEnum):
|
||||
V_SWMMAC_F32_16X16X32_BF8_BF8 = 90
|
||||
|
||||
class VOP3SDOp(IntEnum):
|
||||
DWORD = 1
|
||||
V_ADD_CO_CI_U32 = 288
|
||||
V_SUB_CO_CI_U32 = 289
|
||||
V_SUBREV_CO_CI_U32 = 290
|
||||
@@ -1627,52 +1692,3 @@ class VSCRATCHOp(IntEnum):
|
||||
SCRATCH_STORE_D16_HI_B16 = 37
|
||||
SCRATCH_LOAD_BLOCK = 83
|
||||
SCRATCH_STORE_BLOCK = 84
|
||||
|
||||
class BufFmt(IntEnum):
|
||||
BUF_FMT_8_UNORM = 1
|
||||
BUF_FMT_8_SNORM = 2
|
||||
BUF_FMT_8_USCALED = 3
|
||||
BUF_FMT_8_SSCALED = 4
|
||||
BUF_FMT_8_UINT = 5
|
||||
BUF_FMT_8_SINT = 6
|
||||
BUF_FMT_16_UNORM = 7
|
||||
BUF_FMT_16_SNORM = 8
|
||||
BUF_FMT_16_USCALED = 9
|
||||
BUF_FMT_16_SSCALED = 10
|
||||
BUF_FMT_16_UINT = 11
|
||||
BUF_FMT_16_SINT = 12
|
||||
BUF_FMT_16_FLOAT = 13
|
||||
BUF_FMT_8_8_UNORM = 14
|
||||
BUF_FMT_8_8_SNORM = 15
|
||||
BUF_FMT_8_8_USCALED = 16
|
||||
BUF_FMT_8_8_SSCALED = 17
|
||||
BUF_FMT_8_8_UINT = 18
|
||||
BUF_FMT_8_8_SINT = 19
|
||||
BUF_FMT_32_UINT = 20
|
||||
BUF_FMT_32_SINT = 21
|
||||
BUF_FMT_32_FLOAT = 22
|
||||
BUF_FMT_16_16_UNORM = 23
|
||||
BUF_FMT_10_10_10_2_UNORM = 32
|
||||
BUF_FMT_10_10_10_2_SNORM = 33
|
||||
BUF_FMT_10_10_10_2_UINT = 34
|
||||
BUF_FMT_10_10_10_2_SINT = 35
|
||||
BUF_FMT_2_10_10_10_UNORM = 36
|
||||
BUF_FMT_2_10_10_10_SNORM = 37
|
||||
BUF_FMT_2_10_10_10_USCALED = 38
|
||||
BUF_FMT_2_10_10_10_SSCALED = 39
|
||||
BUF_FMT_2_10_10_10_UINT = 40
|
||||
BUF_FMT_2_10_10_10_SINT = 41
|
||||
BUF_FMT_8_8_8_8_UNORM = 42
|
||||
BUF_FMT_8_8_8_8_SNORM = 43
|
||||
BUF_FMT_8_8_8_8_USCALED = 44
|
||||
BUF_FMT_8_8_8_8_SSCALED = 45
|
||||
BUF_FMT_8_8_8_8_UINT = 46
|
||||
BUF_FMT_8_8_8_8_SINT = 47
|
||||
BUF_FMT_32_32_UINT = 48
|
||||
BUF_FMT_32_32_SINT = 49
|
||||
BUF_FMT_32_32_FLOAT = 50
|
||||
BUF_FMT_16_16_16_16_UNORM = 51
|
||||
BUF_FMT_16_16_16_16_SNORM = 52
|
||||
BUF_FMT_16_16_16_16_USCALED = 53
|
||||
BUF_FMT_16_16_16_16_SSCALED = 54
|
||||
BUF_FMT_16_16_16_16_UINT = 55
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
# autogenerated from AMD RDNA4 ISA PDF by pdf.py - do not edit
|
||||
# autogenerated from AMD ISA PDF by pdf.py - do not edit
|
||||
# ruff: noqa: F401,F403
|
||||
from typing import Annotated
|
||||
from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, Inst96, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
|
||||
from extra.assembly.amd.dsl import *
|
||||
from extra.assembly.amd.autogen.rdna4.enum import *
|
||||
import functools
|
||||
|
||||
# instruction formats
|
||||
class DPP16(Inst64):
|
||||
class DPP16(Inst):
|
||||
src0:Src = bits[39:32]
|
||||
dpp_ctrl = bits[48:40]
|
||||
fi = bits[50]
|
||||
@@ -18,7 +17,7 @@ class DPP16(Inst64):
|
||||
bank_mask = bits[59:56]
|
||||
row_mask = bits[63:60]
|
||||
|
||||
class DPP8(Inst64):
|
||||
class DPP8(Inst):
|
||||
src0:Src = bits[39:32]
|
||||
lane_sel0 = bits[42:40]
|
||||
lane_sel1 = bits[45:43]
|
||||
@@ -29,7 +28,17 @@ class DPP8(Inst64):
|
||||
lane_sel6 = bits[60:58]
|
||||
lane_sel7 = bits[63:61]
|
||||
|
||||
class SMEM(Inst64):
|
||||
class DS(Inst):
|
||||
encoding = bits[31:26] == 0b110110
|
||||
op:Annotated[BitField, DSOp] = bits[25:18]
|
||||
vdst:VGPRField = bits[63:56]
|
||||
addr:VGPRField = bits[39:32]
|
||||
data0:VGPRField = bits[47:40]
|
||||
data1:VGPRField = bits[55:48]
|
||||
offset0 = bits[7:0]
|
||||
offset1 = bits[15:8]
|
||||
|
||||
class SMEM(Inst):
|
||||
encoding = bits[31:26] == 0b111101
|
||||
op:Annotated[BitField, SMEMOp] = bits[18:13]
|
||||
sdata:SGPRField = bits[12:6]
|
||||
@@ -39,153 +48,116 @@ class SMEM(Inst64):
|
||||
th = bits[24:23]
|
||||
ioffset = bits[55:32]
|
||||
|
||||
class SOP1(Inst32):
|
||||
class SOP1(Inst):
|
||||
encoding = bits[31:23] == 0b101111101
|
||||
op:Annotated[BitField, SOP1Op] = bits[15:8]
|
||||
sdst:SGPRField = bits[22:16]
|
||||
ssrc0:SSrc = bits[7:0]
|
||||
|
||||
class SOP2(Inst32):
|
||||
class SOP2(Inst):
|
||||
encoding = bits[31:30] == 0b10
|
||||
op:Annotated[BitField, SOP2Op] = bits[29:23]
|
||||
sdst:SGPRField = bits[22:16]
|
||||
ssrc0:SSrc = bits[7:0]
|
||||
ssrc1:SSrc = bits[15:8]
|
||||
|
||||
class SOPC(Inst32):
|
||||
class SOPC(Inst):
|
||||
encoding = bits[31:23] == 0b101111110
|
||||
op:Annotated[BitField, SOPCOp] = bits[22:16]
|
||||
ssrc0:SSrc = bits[7:0]
|
||||
ssrc1:SSrc = bits[15:8]
|
||||
|
||||
class SOPK(Inst32):
|
||||
class SOPK(Inst):
|
||||
encoding = bits[31:28] == 0b1011
|
||||
op:Annotated[BitField, SOPKOp] = bits[27:23]
|
||||
sdst:SGPRField = bits[22:16]
|
||||
simm16:SImm = bits[15:0]
|
||||
|
||||
class SOPP(Inst32):
|
||||
class SOPP(Inst):
|
||||
encoding = bits[31:23] == 0b101111111
|
||||
op:Annotated[BitField, SOPPOp] = bits[22:16]
|
||||
simm16:SImm = bits[15:0]
|
||||
|
||||
class VBUFFER(Inst96):
|
||||
class VBUFFER(Inst):
|
||||
encoding = bits[31:26] == 0b110001
|
||||
soffset:SSrc = bits[6:0]
|
||||
op:Annotated[BitField, VBUFFEROp] = bits[21:14]
|
||||
tfe = bits[22]
|
||||
vdata:VGPRField = bits[39:32]
|
||||
rsrc = bits[49:41]
|
||||
scope = bits[51:50]
|
||||
th = bits[54:52]
|
||||
vaddr:VGPRField = bits[71:64]
|
||||
soffset:SSrc = bits[6:0]
|
||||
format = bits[61:55]
|
||||
offen = bits[62]
|
||||
idxen = bits[63]
|
||||
vaddr:VGPRField = bits[71:64]
|
||||
tfe = bits[22]
|
||||
rsrc = bits[49:41]
|
||||
scope = bits[51:50]
|
||||
th = bits[54:52]
|
||||
ioffset = bits[95:72]
|
||||
|
||||
class VDS(Inst64):
|
||||
encoding = bits[31:26] == 0b110110
|
||||
offset0 = bits[7:0]
|
||||
offset1 = bits[15:8]
|
||||
op = bits[25:18]
|
||||
addr:VGPRField = bits[39:32]
|
||||
data0:VGPRField = bits[47:40]
|
||||
data1:VGPRField = bits[55:48]
|
||||
vdst:VGPRField = bits[63:56]
|
||||
|
||||
class VDSDIR(Inst64):
|
||||
encoding = bits[31:24] == 0b11001101
|
||||
class VDSDIR(Inst):
|
||||
encoding = bits[31:24] == 0b11001110
|
||||
op:Annotated[BitField, VDSDIROp] = bits[21:20]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
waitexp = bits[10:8]
|
||||
opsel = bits[14:11]
|
||||
cm = bits[15]
|
||||
op:Annotated[BitField, VDSDIROp] = bits[20:16]
|
||||
src0:Src = bits[40:32]
|
||||
src1:Src = bits[49:41]
|
||||
src2:Src = bits[58:50]
|
||||
neg = bits[63:61]
|
||||
attr = bits[15:10]
|
||||
attr_chan = bits[9:8]
|
||||
wait_va = bits[19:16]
|
||||
wait_vmvsrc = bits[23]
|
||||
|
||||
class VEXPORT(Inst64):
|
||||
class VEXPORT(Inst):
|
||||
encoding = bits[31:26] == 0b111110
|
||||
vsrc0:VGPRField = bits[39:32]
|
||||
vsrc1:VGPRField = bits[47:40]
|
||||
vsrc2:VGPRField = bits[55:48]
|
||||
vsrc3:VGPRField = bits[63:56]
|
||||
en = bits[3:0]
|
||||
target = bits[9:4]
|
||||
done = bits[11]
|
||||
row = bits[13]
|
||||
vsrc0 = bits[39:32]
|
||||
vsrc1:VGPRField = bits[47:40]
|
||||
vsrc2 = bits[55:48]
|
||||
vsrc3 = bits[63:56]
|
||||
|
||||
class VFLAT(Inst96):
|
||||
encoding = bits[31:24] == 0b11101100
|
||||
saddr:SSrc = bits[6:0]
|
||||
op:Annotated[BitField, VFLATOp] = bits[20:14]
|
||||
vdst:VGPRField = bits[39:32]
|
||||
sve = bits[49]
|
||||
scope = bits[51:50]
|
||||
th = bits[54:52]
|
||||
vsrc = bits[62:55]
|
||||
vaddr:VGPRField = bits[71:64]
|
||||
ioffset = bits[95:72]
|
||||
|
||||
class VGLOBAL(Inst96):
|
||||
encoding = bits[31:24] == 0b11101110
|
||||
saddr:SSrc = bits[6:0]
|
||||
op:Annotated[BitField, VGLOBALOp] = bits[20:14]
|
||||
vdst:VGPRField = bits[39:32]
|
||||
sve = bits[49]
|
||||
scope = bits[51:50]
|
||||
th = bits[54:52]
|
||||
vsrc = bits[62:55]
|
||||
vaddr:VGPRField = bits[71:64]
|
||||
ioffset = bits[95:72]
|
||||
|
||||
class VIMAGE(Inst96):
|
||||
class VIMAGE(Inst):
|
||||
encoding = bits[31:26] == 0b110100
|
||||
op:Annotated[BitField, VIMAGEOp] = bits[21:14]
|
||||
vdata:VGPRField = bits[39:32]
|
||||
dmask = bits[25:22]
|
||||
dim = bits[2:0]
|
||||
tfe = bits[55]
|
||||
r128 = bits[4]
|
||||
d16 = bits[5]
|
||||
a16 = bits[6]
|
||||
op:Annotated[BitField, VIMAGEOp] = bits[21:14]
|
||||
dmask = bits[25:22]
|
||||
vdata:VGPRField = bits[39:32]
|
||||
rsrc = bits[49:41]
|
||||
scope = bits[51:50]
|
||||
th = bits[54:52]
|
||||
tfe = bits[55]
|
||||
vaddr4 = bits[56:63]
|
||||
vaddr0 = bits[71:64]
|
||||
vaddr1 = bits[79:72]
|
||||
vaddr2 = bits[87:80]
|
||||
vaddr3 = bits[95:88]
|
||||
|
||||
class VINTERP(Inst64):
|
||||
class VINTERP(Inst):
|
||||
encoding = bits[31:24] == 0b11001101
|
||||
op:Annotated[BitField, VINTERPOp] = bits[20:16]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
src0:Src = bits[40:32]
|
||||
src1:Src = bits[49:41]
|
||||
src2:Src = bits[58:50]
|
||||
waitexp = bits[10:8]
|
||||
opsel = bits[14:11]
|
||||
neg = bits[63:61]
|
||||
opsel = bits[14:11]
|
||||
waitexp = bits[10:8]
|
||||
cm = bits[15]
|
||||
|
||||
class VOP1(Inst32):
|
||||
encoding = bits[31:25] == 0b111111
|
||||
class VOP1(Inst):
|
||||
encoding = bits[31:25] == 0b0111111
|
||||
op:Annotated[BitField, VOP1Op] = bits[15:9]
|
||||
vdst:VGPRField = bits[24:17]
|
||||
src0:Src = bits[8:0]
|
||||
|
||||
class VOP2(Inst32):
|
||||
encoding = bits[31] == 0
|
||||
class VOP2(Inst):
|
||||
encoding = bits[31] == 0b0
|
||||
op:Annotated[BitField, VOP2Op] = bits[30:25]
|
||||
vdst:VGPRField = bits[24:17]
|
||||
src0:Src = bits[8:0]
|
||||
vsrc1:VGPRField = bits[16:9]
|
||||
|
||||
class VOP3(Inst64):
|
||||
class VOP3(Inst):
|
||||
encoding = bits[31:26] == 0b110101
|
||||
op:Annotated[BitField, VOP3Op] = bits[25:16]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
@@ -198,9 +170,8 @@ class VOP3(Inst64):
|
||||
opsel = bits[14:11]
|
||||
cm = bits[15]
|
||||
|
||||
class VOP3P(Inst64):
|
||||
class VOP3P(Inst):
|
||||
encoding = bits[31:24] == 0b11001100
|
||||
_defaults = {'opsel_hi': 3, 'opsel_hi2': 1}
|
||||
op:Annotated[BitField, VOP3POp] = bits[22:16]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
src0:Src = bits[40:32]
|
||||
@@ -213,7 +184,7 @@ class VOP3P(Inst64):
|
||||
opsel_hi2 = bits[14]
|
||||
cm = bits[15]
|
||||
|
||||
class VOP3SD(Inst64):
|
||||
class VOP3SD(Inst):
|
||||
encoding = bits[31:26] == 0b110101
|
||||
op:Annotated[BitField, VOP3SDOp] = bits[25:16]
|
||||
vdst:VGPRField = bits[7:0]
|
||||
@@ -221,38 +192,38 @@ class VOP3SD(Inst64):
|
||||
src0:Src = bits[40:32]
|
||||
src1:Src = bits[49:41]
|
||||
src2:Src = bits[58:50]
|
||||
cm = bits[15]
|
||||
omod = bits[60:59]
|
||||
neg = bits[63:61]
|
||||
cm = bits[15]
|
||||
|
||||
class VOPC(Inst32):
|
||||
encoding = bits[31:25] == 0b111110
|
||||
class VOPC(Inst):
|
||||
encoding = bits[31:25] == 0b0111110
|
||||
op:Annotated[BitField, VOPCOp] = bits[24:17]
|
||||
src0:Src = bits[8:0]
|
||||
vsrc1:VGPRField = bits[16:9]
|
||||
|
||||
class VOPD(Inst64):
|
||||
class VOPD(Inst):
|
||||
encoding = bits[31:26] == 0b110010
|
||||
opx:Annotated[BitField, VOPDOp] = bits[25:22]
|
||||
opy:Annotated[BitField, VOPDOp] = bits[21:17]
|
||||
vdstx:VGPRField = bits[63:56]
|
||||
vdstx = bits[63:56]
|
||||
vdsty:VDSTYEnc = bits[55:49]
|
||||
srcx0:Src = bits[8:0]
|
||||
vsrcx1:VGPRField = bits[16:9]
|
||||
srcy0:Src = bits[40:32]
|
||||
vsrcy1:VGPRField = bits[48:41]
|
||||
vsrcx1 = bits[16:9]
|
||||
vsrcy1 = bits[48:41]
|
||||
|
||||
class VSAMPLE(Inst96):
|
||||
class VSAMPLE(Inst):
|
||||
encoding = bits[31:26] == 0b111001
|
||||
op:Annotated[BitField, VSAMPLEOp] = bits[21:14]
|
||||
vdata:VGPRField = bits[39:32]
|
||||
dmask = bits[25:22]
|
||||
dim = bits[2:0]
|
||||
tfe = bits[3]
|
||||
unrm = bits[13]
|
||||
r128 = bits[4]
|
||||
d16 = bits[5]
|
||||
a16 = bits[6]
|
||||
unrm = bits[13]
|
||||
op:Annotated[BitField, VSAMPLEOp] = bits[21:14]
|
||||
dmask = bits[25:22]
|
||||
vdata:VGPRField = bits[39:32]
|
||||
lwe = bits[40]
|
||||
rsrc = bits[49:41]
|
||||
scope = bits[51:50]
|
||||
@@ -263,19 +234,130 @@ class VSAMPLE(Inst96):
|
||||
vaddr2 = bits[87:80]
|
||||
vaddr3 = bits[95:88]
|
||||
|
||||
class VSCRATCH(Inst96):
|
||||
encoding = bits[31:24] == 0b11101101
|
||||
saddr:SSrc = bits[6:0]
|
||||
op:Annotated[BitField, VSCRATCHOp] = bits[20:14]
|
||||
vdst:VGPRField = bits[39:32]
|
||||
sve = bits[49]
|
||||
scope = bits[51:50]
|
||||
th = bits[54:52]
|
||||
vsrc = bits[62:55]
|
||||
vaddr:VGPRField = bits[71:64]
|
||||
ioffset = bits[95:72]
|
||||
|
||||
# instruction helpers
|
||||
ds_add_u32 = functools.partial(DS, DSOp.DS_ADD_U32)
|
||||
ds_sub_u32 = functools.partial(DS, DSOp.DS_SUB_U32)
|
||||
ds_rsub_u32 = functools.partial(DS, DSOp.DS_RSUB_U32)
|
||||
ds_inc_u32 = functools.partial(DS, DSOp.DS_INC_U32)
|
||||
ds_dec_u32 = functools.partial(DS, DSOp.DS_DEC_U32)
|
||||
ds_min_i32 = functools.partial(DS, DSOp.DS_MIN_I32)
|
||||
ds_max_i32 = functools.partial(DS, DSOp.DS_MAX_I32)
|
||||
ds_min_u32 = functools.partial(DS, DSOp.DS_MIN_U32)
|
||||
ds_max_u32 = functools.partial(DS, DSOp.DS_MAX_U32)
|
||||
ds_and_b32 = functools.partial(DS, DSOp.DS_AND_B32)
|
||||
ds_or_b32 = functools.partial(DS, DSOp.DS_OR_B32)
|
||||
ds_xor_b32 = functools.partial(DS, DSOp.DS_XOR_B32)
|
||||
ds_mskor_b32 = functools.partial(DS, DSOp.DS_MSKOR_B32)
|
||||
ds_store_b32 = functools.partial(DS, DSOp.DS_STORE_B32)
|
||||
ds_store_2addr_b32 = functools.partial(DS, DSOp.DS_STORE_2ADDR_B32)
|
||||
ds_store_2addr_stride64_b32 = functools.partial(DS, DSOp.DS_STORE_2ADDR_STRIDE64_B32)
|
||||
ds_cmpstore_b32 = functools.partial(DS, DSOp.DS_CMPSTORE_B32)
|
||||
ds_min_num_f32 = functools.partial(DS, DSOp.DS_MIN_NUM_F32)
|
||||
ds_max_num_f32 = functools.partial(DS, DSOp.DS_MAX_NUM_F32)
|
||||
ds_nop = functools.partial(DS, DSOp.DS_NOP)
|
||||
ds_add_f32 = functools.partial(DS, DSOp.DS_ADD_F32)
|
||||
ds_store_b8 = functools.partial(DS, DSOp.DS_STORE_B8)
|
||||
ds_store_b16 = functools.partial(DS, DSOp.DS_STORE_B16)
|
||||
ds_add_rtn_u32 = functools.partial(DS, DSOp.DS_ADD_RTN_U32)
|
||||
ds_sub_rtn_u32 = functools.partial(DS, DSOp.DS_SUB_RTN_U32)
|
||||
ds_rsub_rtn_u32 = functools.partial(DS, DSOp.DS_RSUB_RTN_U32)
|
||||
ds_inc_rtn_u32 = functools.partial(DS, DSOp.DS_INC_RTN_U32)
|
||||
ds_dec_rtn_u32 = functools.partial(DS, DSOp.DS_DEC_RTN_U32)
|
||||
ds_min_rtn_i32 = functools.partial(DS, DSOp.DS_MIN_RTN_I32)
|
||||
ds_max_rtn_i32 = functools.partial(DS, DSOp.DS_MAX_RTN_I32)
|
||||
ds_min_rtn_u32 = functools.partial(DS, DSOp.DS_MIN_RTN_U32)
|
||||
ds_max_rtn_u32 = functools.partial(DS, DSOp.DS_MAX_RTN_U32)
|
||||
ds_and_rtn_b32 = functools.partial(DS, DSOp.DS_AND_RTN_B32)
|
||||
ds_or_rtn_b32 = functools.partial(DS, DSOp.DS_OR_RTN_B32)
|
||||
ds_xor_rtn_b32 = functools.partial(DS, DSOp.DS_XOR_RTN_B32)
|
||||
ds_mskor_rtn_b32 = functools.partial(DS, DSOp.DS_MSKOR_RTN_B32)
|
||||
ds_storexchg_rtn_b32 = functools.partial(DS, DSOp.DS_STOREXCHG_RTN_B32)
|
||||
ds_storexchg_2addr_rtn_b32 = functools.partial(DS, DSOp.DS_STOREXCHG_2ADDR_RTN_B32)
|
||||
ds_storexchg_2addr_stride64_rtn_b32 = functools.partial(DS, DSOp.DS_STOREXCHG_2ADDR_STRIDE64_RTN_B32)
|
||||
ds_cmpstore_rtn_b32 = functools.partial(DS, DSOp.DS_CMPSTORE_RTN_B32)
|
||||
ds_min_num_rtn_f32 = functools.partial(DS, DSOp.DS_MIN_NUM_RTN_F32)
|
||||
ds_max_num_rtn_f32 = functools.partial(DS, DSOp.DS_MAX_NUM_RTN_F32)
|
||||
ds_swizzle_b32 = functools.partial(DS, DSOp.DS_SWIZZLE_B32)
|
||||
ds_load_b32 = functools.partial(DS, DSOp.DS_LOAD_B32)
|
||||
ds_load_2addr_b32 = functools.partial(DS, DSOp.DS_LOAD_2ADDR_B32)
|
||||
ds_load_2addr_stride64_b32 = functools.partial(DS, DSOp.DS_LOAD_2ADDR_STRIDE64_B32)
|
||||
ds_load_i8 = functools.partial(DS, DSOp.DS_LOAD_I8)
|
||||
ds_load_u8 = functools.partial(DS, DSOp.DS_LOAD_U8)
|
||||
ds_load_i16 = functools.partial(DS, DSOp.DS_LOAD_I16)
|
||||
ds_load_u16 = functools.partial(DS, DSOp.DS_LOAD_U16)
|
||||
ds_consume = functools.partial(DS, DSOp.DS_CONSUME)
|
||||
ds_append = functools.partial(DS, DSOp.DS_APPEND)
|
||||
ds_add_u64 = functools.partial(DS, DSOp.DS_ADD_U64)
|
||||
ds_sub_u64 = functools.partial(DS, DSOp.DS_SUB_U64)
|
||||
ds_rsub_u64 = functools.partial(DS, DSOp.DS_RSUB_U64)
|
||||
ds_inc_u64 = functools.partial(DS, DSOp.DS_INC_U64)
|
||||
ds_dec_u64 = functools.partial(DS, DSOp.DS_DEC_U64)
|
||||
ds_min_i64 = functools.partial(DS, DSOp.DS_MIN_I64)
|
||||
ds_max_i64 = functools.partial(DS, DSOp.DS_MAX_I64)
|
||||
ds_min_u64 = functools.partial(DS, DSOp.DS_MIN_U64)
|
||||
ds_max_u64 = functools.partial(DS, DSOp.DS_MAX_U64)
|
||||
ds_and_b64 = functools.partial(DS, DSOp.DS_AND_B64)
|
||||
ds_or_b64 = functools.partial(DS, DSOp.DS_OR_B64)
|
||||
ds_xor_b64 = functools.partial(DS, DSOp.DS_XOR_B64)
|
||||
ds_mskor_b64 = functools.partial(DS, DSOp.DS_MSKOR_B64)
|
||||
ds_store_b64 = functools.partial(DS, DSOp.DS_STORE_B64)
|
||||
ds_store_2addr_b64 = functools.partial(DS, DSOp.DS_STORE_2ADDR_B64)
|
||||
ds_store_2addr_stride64_b64 = functools.partial(DS, DSOp.DS_STORE_2ADDR_STRIDE64_B64)
|
||||
ds_cmpstore_b64 = functools.partial(DS, DSOp.DS_CMPSTORE_B64)
|
||||
ds_min_num_f64 = functools.partial(DS, DSOp.DS_MIN_NUM_F64)
|
||||
ds_max_num_f64 = functools.partial(DS, DSOp.DS_MAX_NUM_F64)
|
||||
ds_add_rtn_u64 = functools.partial(DS, DSOp.DS_ADD_RTN_U64)
|
||||
ds_sub_rtn_u64 = functools.partial(DS, DSOp.DS_SUB_RTN_U64)
|
||||
ds_rsub_rtn_u64 = functools.partial(DS, DSOp.DS_RSUB_RTN_U64)
|
||||
ds_inc_rtn_u64 = functools.partial(DS, DSOp.DS_INC_RTN_U64)
|
||||
ds_dec_rtn_u64 = functools.partial(DS, DSOp.DS_DEC_RTN_U64)
|
||||
ds_min_rtn_i64 = functools.partial(DS, DSOp.DS_MIN_RTN_I64)
|
||||
ds_max_rtn_i64 = functools.partial(DS, DSOp.DS_MAX_RTN_I64)
|
||||
ds_min_rtn_u64 = functools.partial(DS, DSOp.DS_MIN_RTN_U64)
|
||||
ds_max_rtn_u64 = functools.partial(DS, DSOp.DS_MAX_RTN_U64)
|
||||
ds_and_rtn_b64 = functools.partial(DS, DSOp.DS_AND_RTN_B64)
|
||||
ds_or_rtn_b64 = functools.partial(DS, DSOp.DS_OR_RTN_B64)
|
||||
ds_xor_rtn_b64 = functools.partial(DS, DSOp.DS_XOR_RTN_B64)
|
||||
ds_mskor_rtn_b64 = functools.partial(DS, DSOp.DS_MSKOR_RTN_B64)
|
||||
ds_storexchg_rtn_b64 = functools.partial(DS, DSOp.DS_STOREXCHG_RTN_B64)
|
||||
ds_storexchg_2addr_rtn_b64 = functools.partial(DS, DSOp.DS_STOREXCHG_2ADDR_RTN_B64)
|
||||
ds_storexchg_2addr_stride64_rtn_b64 = functools.partial(DS, DSOp.DS_STOREXCHG_2ADDR_STRIDE64_RTN_B64)
|
||||
ds_cmpstore_rtn_b64 = functools.partial(DS, DSOp.DS_CMPSTORE_RTN_B64)
|
||||
ds_min_num_rtn_f64 = functools.partial(DS, DSOp.DS_MIN_NUM_RTN_F64)
|
||||
ds_max_num_rtn_f64 = functools.partial(DS, DSOp.DS_MAX_NUM_RTN_F64)
|
||||
ds_load_b64 = functools.partial(DS, DSOp.DS_LOAD_B64)
|
||||
ds_load_2addr_b64 = functools.partial(DS, DSOp.DS_LOAD_2ADDR_B64)
|
||||
ds_load_2addr_stride64_b64 = functools.partial(DS, DSOp.DS_LOAD_2ADDR_STRIDE64_B64)
|
||||
ds_add_rtn_f32 = functools.partial(DS, DSOp.DS_ADD_RTN_F32)
|
||||
ds_condxchg32_rtn_b64 = functools.partial(DS, DSOp.DS_CONDXCHG32_RTN_B64)
|
||||
ds_cond_sub_u32 = functools.partial(DS, DSOp.DS_COND_SUB_U32)
|
||||
ds_sub_clamp_u32 = functools.partial(DS, DSOp.DS_SUB_CLAMP_U32)
|
||||
ds_pk_add_f16 = functools.partial(DS, DSOp.DS_PK_ADD_F16)
|
||||
ds_pk_add_bf16 = functools.partial(DS, DSOp.DS_PK_ADD_BF16)
|
||||
ds_store_b8_d16_hi = functools.partial(DS, DSOp.DS_STORE_B8_D16_HI)
|
||||
ds_store_b16_d16_hi = functools.partial(DS, DSOp.DS_STORE_B16_D16_HI)
|
||||
ds_load_u8_d16 = functools.partial(DS, DSOp.DS_LOAD_U8_D16)
|
||||
ds_load_u8_d16_hi = functools.partial(DS, DSOp.DS_LOAD_U8_D16_HI)
|
||||
ds_load_i8_d16 = functools.partial(DS, DSOp.DS_LOAD_I8_D16)
|
||||
ds_load_i8_d16_hi = functools.partial(DS, DSOp.DS_LOAD_I8_D16_HI)
|
||||
ds_load_u16_d16 = functools.partial(DS, DSOp.DS_LOAD_U16_D16)
|
||||
ds_load_u16_d16_hi = functools.partial(DS, DSOp.DS_LOAD_U16_D16_HI)
|
||||
ds_cond_sub_rtn_u32 = functools.partial(DS, DSOp.DS_COND_SUB_RTN_U32)
|
||||
ds_sub_clamp_rtn_u32 = functools.partial(DS, DSOp.DS_SUB_CLAMP_RTN_U32)
|
||||
ds_pk_add_rtn_f16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_F16)
|
||||
ds_pk_add_rtn_bf16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_BF16)
|
||||
ds_store_addtid_b32 = functools.partial(DS, DSOp.DS_STORE_ADDTID_B32)
|
||||
ds_load_addtid_b32 = functools.partial(DS, DSOp.DS_LOAD_ADDTID_B32)
|
||||
ds_permute_b32 = functools.partial(DS, DSOp.DS_PERMUTE_B32)
|
||||
ds_bpermute_b32 = functools.partial(DS, DSOp.DS_BPERMUTE_B32)
|
||||
ds_bpermute_fi_b32 = functools.partial(DS, DSOp.DS_BPERMUTE_FI_B32)
|
||||
ds_store_b96 = functools.partial(DS, DSOp.DS_STORE_B96)
|
||||
ds_store_b128 = functools.partial(DS, DSOp.DS_STORE_B128)
|
||||
ds_bvh_stack_push4_pop1_rtn_b32 = functools.partial(DS, DSOp.DS_BVH_STACK_PUSH4_POP1_RTN_B32)
|
||||
ds_bvh_stack_push8_pop1_rtn_b32 = functools.partial(DS, DSOp.DS_BVH_STACK_PUSH8_POP1_RTN_B32)
|
||||
ds_bvh_stack_push8_pop2_rtn_b64 = functools.partial(DS, DSOp.DS_BVH_STACK_PUSH8_POP2_RTN_B64)
|
||||
ds_load_b96 = functools.partial(DS, DSOp.DS_LOAD_B96)
|
||||
ds_load_b128 = functools.partial(DS, DSOp.DS_LOAD_B128)
|
||||
s_load_b32 = functools.partial(SMEM, SMEMOp.S_LOAD_B32)
|
||||
s_load_b64 = functools.partial(SMEM, SMEMOp.S_LOAD_B64)
|
||||
s_load_b128 = functools.partial(SMEM, SMEMOp.S_LOAD_B128)
|
||||
@@ -647,126 +729,6 @@ tbuffer_store_d16_format_xyz = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STOR
|
||||
tbuffer_store_d16_format_xyzw = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STORE_D16_FORMAT_XYZW)
|
||||
ds_param_load = functools.partial(VDSDIR, VDSDIROp.DS_PARAM_LOAD)
|
||||
ds_direct_load = functools.partial(VDSDIR, VDSDIROp.DS_DIRECT_LOAD)
|
||||
flat_load_u8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_U8)
|
||||
flat_load_i8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_I8)
|
||||
flat_load_u16 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_U16)
|
||||
flat_load_i16 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_I16)
|
||||
flat_load_b32 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_B32)
|
||||
flat_load_b64 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_B64)
|
||||
flat_load_b96 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_B96)
|
||||
flat_load_b128 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_B128)
|
||||
flat_store_b8 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B8)
|
||||
flat_store_b16 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B16)
|
||||
flat_store_b32 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B32)
|
||||
flat_store_b64 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B64)
|
||||
flat_store_b96 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B96)
|
||||
flat_store_b128 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_B128)
|
||||
flat_load_d16_u8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_U8)
|
||||
flat_load_d16_i8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_I8)
|
||||
flat_load_d16_b16 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_B16)
|
||||
flat_load_d16_hi_u8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_HI_U8)
|
||||
flat_load_d16_hi_i8 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_HI_I8)
|
||||
flat_load_d16_hi_b16 = functools.partial(VFLAT, VFLATOp.FLAT_LOAD_D16_HI_B16)
|
||||
flat_store_d16_hi_b8 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_D16_HI_B8)
|
||||
flat_store_d16_hi_b16 = functools.partial(VFLAT, VFLATOp.FLAT_STORE_D16_HI_B16)
|
||||
flat_atomic_swap_b32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_SWAP_B32)
|
||||
flat_atomic_cmpswap_b32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_CMPSWAP_B32)
|
||||
flat_atomic_add_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_ADD_U32)
|
||||
flat_atomic_sub_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_SUB_U32)
|
||||
flat_atomic_sub_clamp_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_SUB_CLAMP_U32)
|
||||
flat_atomic_min_i32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MIN_I32)
|
||||
flat_atomic_min_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MIN_U32)
|
||||
flat_atomic_max_i32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MAX_I32)
|
||||
flat_atomic_max_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MAX_U32)
|
||||
flat_atomic_and_b32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_AND_B32)
|
||||
flat_atomic_or_b32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_OR_B32)
|
||||
flat_atomic_xor_b32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_XOR_B32)
|
||||
flat_atomic_inc_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_INC_U32)
|
||||
flat_atomic_dec_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_DEC_U32)
|
||||
flat_atomic_swap_b64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_SWAP_B64)
|
||||
flat_atomic_cmpswap_b64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_CMPSWAP_B64)
|
||||
flat_atomic_add_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_ADD_U64)
|
||||
flat_atomic_sub_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_SUB_U64)
|
||||
flat_atomic_min_i64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MIN_I64)
|
||||
flat_atomic_min_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MIN_U64)
|
||||
flat_atomic_max_i64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MAX_I64)
|
||||
flat_atomic_max_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MAX_U64)
|
||||
flat_atomic_and_b64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_AND_B64)
|
||||
flat_atomic_or_b64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_OR_B64)
|
||||
flat_atomic_xor_b64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_XOR_B64)
|
||||
flat_atomic_inc_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_INC_U64)
|
||||
flat_atomic_dec_u64 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_DEC_U64)
|
||||
flat_atomic_cond_sub_u32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_COND_SUB_U32)
|
||||
flat_atomic_min_num_f32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MIN_NUM_F32)
|
||||
flat_atomic_max_num_f32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_MAX_NUM_F32)
|
||||
flat_atomic_add_f32 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_ADD_F32)
|
||||
flat_atomic_pk_add_f16 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_PK_ADD_F16)
|
||||
flat_atomic_pk_add_bf16 = functools.partial(VFLAT, VFLATOp.FLAT_ATOMIC_PK_ADD_BF16)
|
||||
global_load_u8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_U8)
|
||||
global_load_i8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_I8)
|
||||
global_load_u16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_U16)
|
||||
global_load_i16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_I16)
|
||||
global_load_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_B32)
|
||||
global_load_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_B64)
|
||||
global_load_b96 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_B96)
|
||||
global_load_b128 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_B128)
|
||||
global_store_b8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B8)
|
||||
global_store_b16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B16)
|
||||
global_store_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B32)
|
||||
global_store_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B64)
|
||||
global_store_b96 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B96)
|
||||
global_store_b128 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_B128)
|
||||
global_load_d16_u8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_U8)
|
||||
global_load_d16_i8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_I8)
|
||||
global_load_d16_b16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_B16)
|
||||
global_load_d16_hi_u8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_HI_U8)
|
||||
global_load_d16_hi_i8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_HI_I8)
|
||||
global_load_d16_hi_b16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_D16_HI_B16)
|
||||
global_store_d16_hi_b8 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_D16_HI_B8)
|
||||
global_store_d16_hi_b16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_D16_HI_B16)
|
||||
global_load_addtid_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_ADDTID_B32)
|
||||
global_store_addtid_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_ADDTID_B32)
|
||||
global_inv = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_INV)
|
||||
global_wb = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_WB)
|
||||
global_atomic_swap_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_SWAP_B32)
|
||||
global_atomic_cmpswap_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_CMPSWAP_B32)
|
||||
global_atomic_add_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_ADD_U32)
|
||||
global_atomic_sub_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_SUB_U32)
|
||||
global_atomic_sub_clamp_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_SUB_CLAMP_U32)
|
||||
global_atomic_min_i32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MIN_I32)
|
||||
global_atomic_min_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MIN_U32)
|
||||
global_atomic_max_i32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MAX_I32)
|
||||
global_atomic_max_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MAX_U32)
|
||||
global_atomic_and_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_AND_B32)
|
||||
global_atomic_or_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_OR_B32)
|
||||
global_atomic_xor_b32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_XOR_B32)
|
||||
global_atomic_inc_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_INC_U32)
|
||||
global_atomic_dec_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_DEC_U32)
|
||||
global_atomic_swap_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_SWAP_B64)
|
||||
global_atomic_cmpswap_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_CMPSWAP_B64)
|
||||
global_atomic_add_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_ADD_U64)
|
||||
global_atomic_sub_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_SUB_U64)
|
||||
global_atomic_min_i64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MIN_I64)
|
||||
global_atomic_min_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MIN_U64)
|
||||
global_atomic_max_i64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MAX_I64)
|
||||
global_atomic_max_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MAX_U64)
|
||||
global_atomic_and_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_AND_B64)
|
||||
global_atomic_or_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_OR_B64)
|
||||
global_atomic_xor_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_XOR_B64)
|
||||
global_atomic_inc_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_INC_U64)
|
||||
global_atomic_dec_u64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_DEC_U64)
|
||||
global_wbinv = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_WBINV)
|
||||
global_atomic_cond_sub_u32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_COND_SUB_U32)
|
||||
global_atomic_min_num_f32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MIN_NUM_F32)
|
||||
global_atomic_max_num_f32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_MAX_NUM_F32)
|
||||
global_load_block = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_BLOCK)
|
||||
global_store_block = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_STORE_BLOCK)
|
||||
global_atomic_add_f32 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_ADD_F32)
|
||||
global_load_tr_b128 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_TR_B128)
|
||||
global_load_tr_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_LOAD_TR_B64)
|
||||
global_atomic_pk_add_f16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_PK_ADD_F16)
|
||||
global_atomic_pk_add_bf16 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_PK_ADD_BF16)
|
||||
global_atomic_ordered_add_b64 = functools.partial(VGLOBAL, VGLOBALOp.GLOBAL_ATOMIC_ORDERED_ADD_B64)
|
||||
image_load = functools.partial(VIMAGE, VIMAGEOp.IMAGE_LOAD)
|
||||
image_load_mip = functools.partial(VIMAGE, VIMAGEOp.IMAGE_LOAD_MIP)
|
||||
image_load_pck = functools.partial(VIMAGE, VIMAGEOp.IMAGE_LOAD_PCK)
|
||||
@@ -931,8 +893,8 @@ v_add_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_ADD_NC_U32)
|
||||
v_sub_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUB_NC_U32)
|
||||
v_subrev_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_NC_U32)
|
||||
v_fmac_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F32)
|
||||
def v_fmamk_f32_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F32, vdst, src0, vsrc1, literal=K)
|
||||
def v_fmaak_f32_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F32, vdst, src0, vsrc1, literal=K)
|
||||
v_fmamk_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAMK_F32)
|
||||
v_fmaak_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAAK_F32)
|
||||
v_cvt_pk_rtz_f16_f32_e32 = functools.partial(VOP2, VOP2Op.V_CVT_PK_RTZ_F16_F32)
|
||||
v_min_num_f16_e32 = functools.partial(VOP2, VOP2Op.V_MIN_NUM_F16)
|
||||
v_max_num_f16_e32 = functools.partial(VOP2, VOP2Op.V_MAX_NUM_F16)
|
||||
@@ -941,8 +903,8 @@ v_sub_f16_e32 = functools.partial(VOP2, VOP2Op.V_SUB_F16)
|
||||
v_subrev_f16_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_F16)
|
||||
v_mul_f16_e32 = functools.partial(VOP2, VOP2Op.V_MUL_F16)
|
||||
v_fmac_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F16)
|
||||
def v_fmamk_f16_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F16, vdst, src0, vsrc1, literal=K)
|
||||
def v_fmaak_f16_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F16, vdst, src0, vsrc1, literal=K)
|
||||
v_fmamk_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAMK_F16)
|
||||
v_fmaak_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAAK_F16)
|
||||
v_ldexp_f16_e32 = functools.partial(VOP2, VOP2Op.V_LDEXP_F16)
|
||||
v_pk_fmac_f16_e32 = functools.partial(VOP2, VOP2Op.V_PK_FMAC_F16)
|
||||
v_cmp_lt_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LT_F16)
|
||||
@@ -1435,7 +1397,6 @@ v_swmmac_f32_16x16x32_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16
|
||||
v_swmmac_f32_16x16x32_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_FP8_BF8)
|
||||
v_swmmac_f32_16x16x32_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_BF8_FP8)
|
||||
v_swmmac_f32_16x16x32_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_BF8_BF8)
|
||||
dword = functools.partial(VOP3SD, VOP3SDOp.DWORD)
|
||||
v_add_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_ADD_CO_CI_U32)
|
||||
v_sub_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUB_CO_CI_U32)
|
||||
v_subrev_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUBREV_CO_CI_U32)
|
||||
@@ -1682,55 +1643,4 @@ image_gather4_c_cl = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4_C_CL)
|
||||
image_gather4_c_l = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4_C_L)
|
||||
image_gather4_c_b = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4_C_B)
|
||||
image_gather4_c_b_cl = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4_C_B_CL)
|
||||
image_gather4h = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4H)
|
||||
scratch_load_u8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_U8)
|
||||
scratch_load_i8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_I8)
|
||||
scratch_load_u16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_U16)
|
||||
scratch_load_i16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_I16)
|
||||
scratch_load_b32 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_B32)
|
||||
scratch_load_b64 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_B64)
|
||||
scratch_load_b96 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_B96)
|
||||
scratch_load_b128 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_B128)
|
||||
scratch_store_b8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B8)
|
||||
scratch_store_b16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B16)
|
||||
scratch_store_b32 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B32)
|
||||
scratch_store_b64 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B64)
|
||||
scratch_store_b96 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B96)
|
||||
scratch_store_b128 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_B128)
|
||||
scratch_load_d16_u8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_U8)
|
||||
scratch_load_d16_i8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_I8)
|
||||
scratch_load_d16_b16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_B16)
|
||||
scratch_load_d16_hi_u8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_HI_U8)
|
||||
scratch_load_d16_hi_i8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_HI_I8)
|
||||
scratch_load_d16_hi_b16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_D16_HI_B16)
|
||||
scratch_store_d16_hi_b8 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_D16_HI_B8)
|
||||
scratch_store_d16_hi_b16 = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_D16_HI_B16)
|
||||
scratch_load_block = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_LOAD_BLOCK)
|
||||
scratch_store_block = functools.partial(VSCRATCH, VSCRATCHOp.SCRATCH_STORE_BLOCK)
|
||||
|
||||
VCC_LO = SrcEnum.VCC_LO
|
||||
VCC_HI = SrcEnum.VCC_HI
|
||||
NULL = SrcEnum.NULL
|
||||
M0 = SrcEnum.M0
|
||||
EXEC_LO = SrcEnum.EXEC_LO
|
||||
EXEC_HI = SrcEnum.EXEC_HI
|
||||
ZERO = SrcEnum.ZERO
|
||||
DPP8FI = SrcEnum.DPP8FI
|
||||
SHARED_BASE = SrcEnum.SHARED_BASE
|
||||
SHARED_LIMIT = SrcEnum.SHARED_LIMIT
|
||||
PRIVATE_BASE = SrcEnum.PRIVATE_BASE
|
||||
PRIVATE_LIMIT = SrcEnum.PRIVATE_LIMIT
|
||||
POS_HALF = SrcEnum.POS_HALF
|
||||
NEG_HALF = SrcEnum.NEG_HALF
|
||||
POS_ONE = SrcEnum.POS_ONE
|
||||
NEG_ONE = SrcEnum.NEG_ONE
|
||||
POS_TWO = SrcEnum.POS_TWO
|
||||
NEG_TWO = SrcEnum.NEG_TWO
|
||||
POS_FOUR = SrcEnum.POS_FOUR
|
||||
NEG_FOUR = SrcEnum.NEG_FOUR
|
||||
INV_2PI = SrcEnum.INV_2PI
|
||||
VCCZ = SrcEnum.VCCZ
|
||||
EXECZ = SrcEnum.EXECZ
|
||||
SCC = SrcEnum.SCC
|
||||
LDS_DIRECT = SrcEnum.LDS_DIRECT
|
||||
OFF = NULL
|
||||
image_gather4h = functools.partial(VSAMPLE, VSAMPLEOp.IMAGE_GATHER4H)
|
||||
File diff suppressed because one or more lines are too long
@@ -9,6 +9,18 @@ from extra.assembly.amd.autogen.rdna3.enum import (VOP1Op, VOP2Op, VOP3Op, VOP3S
|
||||
SOPCOp, SOPKOp, SOPPOp, SMEMOp, DSOp, FLATOp, MUBUFOp, MTBUFOp, MIMGOp, VINTERPOp)
|
||||
from extra.assembly.amd.autogen.cdna.enum import VOP1Op as CDNA_VOP1Op, VOP2Op as CDNA_VOP2Op
|
||||
|
||||
# Source operand encoding - constant across all AMD ISAs
|
||||
class SrcEnum(IntEnum):
|
||||
VCC_LO=106; VCC_HI=107; NULL=124; M0=125; EXEC_LO=126; EXEC_HI=127; ZERO=128
|
||||
DPP8=233; DPP8FI=234; SHARED_BASE=235; SHARED_LIMIT=236; PRIVATE_BASE=237; PRIVATE_LIMIT=238
|
||||
POS_HALF=240; NEG_HALF=241; POS_ONE=242; NEG_ONE=243; POS_TWO=244; NEG_TWO=245
|
||||
POS_FOUR=246; NEG_FOUR=247; INV_2PI=248; DPP16=250; VCCZ=251; EXECZ=252; SCC=253; LDS_DIRECT=254
|
||||
VCC_LO, VCC_HI, NULL, M0, EXEC_LO, EXEC_HI, ZERO = SrcEnum.VCC_LO, SrcEnum.VCC_HI, SrcEnum.NULL, SrcEnum.M0, SrcEnum.EXEC_LO, SrcEnum.EXEC_HI, SrcEnum.ZERO
|
||||
DPP8FI, SHARED_BASE, SHARED_LIMIT, PRIVATE_BASE, PRIVATE_LIMIT = SrcEnum.DPP8FI, SrcEnum.SHARED_BASE, SrcEnum.SHARED_LIMIT, SrcEnum.PRIVATE_BASE, SrcEnum.PRIVATE_LIMIT
|
||||
POS_HALF, NEG_HALF, POS_ONE, NEG_ONE, POS_TWO, NEG_TWO = SrcEnum.POS_HALF, SrcEnum.NEG_HALF, SrcEnum.POS_ONE, SrcEnum.NEG_ONE, SrcEnum.POS_TWO, SrcEnum.NEG_TWO
|
||||
POS_FOUR, NEG_FOUR, INV_2PI, VCCZ, EXECZ, SCC, LDS_DIRECT = SrcEnum.POS_FOUR, SrcEnum.NEG_FOUR, SrcEnum.INV_2PI, SrcEnum.VCCZ, SrcEnum.EXECZ, SrcEnum.SCC, SrcEnum.LDS_DIRECT
|
||||
OFF = NULL
|
||||
|
||||
# Common masks and bit conversion functions
|
||||
MASK32, MASK64, MASK128 = 0xffffffff, 0xffffffffffffffff, (1 << 128) - 1
|
||||
_struct_f, _struct_I = struct.Struct("<f"), struct.Struct("<I")
|
||||
@@ -288,7 +300,16 @@ class Inst:
|
||||
|
||||
def __init_subclass__(cls, **kwargs):
|
||||
super().__init_subclass__(**kwargs)
|
||||
cls._fields = {n: v[0] if isinstance(v, tuple) else v for n, v in cls.__dict__.items() if isinstance(v, BitField) or (isinstance(v, tuple) and len(v) == 2 and isinstance(v[0], BitField))}
|
||||
# Merge fields from parent classes
|
||||
cls._fields = {}
|
||||
for base in reversed(cls.__mro__):
|
||||
if base is Inst or not hasattr(base, '_fields'): continue
|
||||
cls._fields.update(base._fields)
|
||||
# Add this class's own fields (overrides parents)
|
||||
cls._fields.update({n: v[0] if isinstance(v, tuple) else v for n, v in cls.__dict__.items() if isinstance(v, BitField) or (isinstance(v, tuple) and len(v) == 2 and isinstance(v[0], BitField))})
|
||||
# Compute size from max bit (exclude optional fields starting at bit 64+, e.g. MIMG NSA)
|
||||
max_bit = max((bf.hi for bf in cls._fields.values() if bf.lo < 64), default=0) if cls._fields else 0
|
||||
cls._sz = 12 if max_bit > 63 else 8 if max_bit > 31 else 4
|
||||
if 'encoding' in cls._fields and isinstance(cls.__dict__.get('encoding'), tuple): cls._encoding = cls.__dict__['encoding']
|
||||
|
||||
def _or_field(self, name: str, bit: int):
|
||||
@@ -352,6 +373,16 @@ class Inst:
|
||||
field_names = [n for n in self._fields if n != 'encoding']
|
||||
# Map Python-friendly names to actual field names (abs_ -> abs for Python reserved word)
|
||||
if 'abs_' in kwargs: kwargs['abs'] = kwargs.pop('abs_')
|
||||
# If more args than fields, treat extra arg as literal (for FMAAK/FMAMK style instructions)
|
||||
# FMAMK has K in middle (vdst, src0, K, vsrc1), FMAAK has K at end (vdst, src0, vsrc1, K)
|
||||
args = list(args)
|
||||
if len(args) > len(field_names) and literal is None:
|
||||
for i, a in enumerate(args):
|
||||
if isinstance(a, int) and not isinstance(a, SrcEnum) and i < len(field_names) and field_names[i] in ('vsrc1',):
|
||||
literal = args.pop(i)
|
||||
break
|
||||
else:
|
||||
literal = args.pop() # fallback: last arg is literal
|
||||
orig_args = dict(zip(field_names, args)) | kwargs
|
||||
self._values.update(orig_args)
|
||||
self._precompute()
|
||||
@@ -450,7 +481,7 @@ class Inst:
|
||||
return result + (lit32 & MASK32).to_bytes(4, 'little')
|
||||
|
||||
@classmethod
|
||||
def _size(cls) -> int: return 4 if issubclass(cls, Inst32) else 12 if issubclass(cls, Inst96) else 8
|
||||
def _size(cls) -> int: return cls._sz
|
||||
def size(self) -> int:
|
||||
# Literal is always 4 bytes in the binary (for 64-bit ops, it's in high 32 bits)
|
||||
return self._size() + (4 if self._literal is not None else 0)
|
||||
@@ -583,6 +614,4 @@ class Inst:
|
||||
def is_64bit(self) -> bool: return spec_is_64bit(self.op_name)
|
||||
def is_dst_16(self) -> bool: return self._spec_regs[0] == 1 and is_dtype_16(self._spec_dtype[0])
|
||||
|
||||
class Inst32(Inst): pass
|
||||
class Inst64(Inst): pass
|
||||
class Inst96(Inst): pass
|
||||
|
||||
|
||||
@@ -7,8 +7,9 @@ from extra.assembly.amd.dsl import Inst, unwrap, FLOAT_ENC, MASK32, MASK64, _f32
|
||||
from extra.assembly.amd.asm import detect_format
|
||||
from extra.assembly.amd.pcode import compile_pseudocode
|
||||
from extra.assembly.amd.autogen.rdna3.str_pcode import PSEUDOCODE_STRINGS
|
||||
from extra.assembly.amd.dsl import SrcEnum
|
||||
from extra.assembly.amd.autogen.rdna3.ins import (SOP1, SOP2, SOPC, SOPK, SOPP, SMEM, VOP1, VOP2, VOP3, VOP3SD, VOP3P, VOPC, DS, FLAT, VOPD,
|
||||
SrcEnum, SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, SMEMOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp, DSOp, FLATOp, GLOBALOp, SCRATCHOp, VOPDOp)
|
||||
SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, SMEMOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp, DSOp, FLATOp, GLOBALOp, SCRATCHOp, VOPDOp)
|
||||
|
||||
WAVE_SIZE, SGPR_COUNT, VGPR_COUNT = 32, 128, 256
|
||||
VCC_LO, VCC_HI, NULL, EXEC_LO, EXEC_HI, SCC = SrcEnum.VCC_LO, SrcEnum.VCC_HI, SrcEnum.NULL, SrcEnum.EXEC_LO, SrcEnum.EXEC_HI, SrcEnum.SCC
|
||||
|
||||
@@ -447,6 +447,27 @@ TWO_OVER_PI_1201 = Reg(0x0145f306dc9c882a53f84eafa3ea69bb81b6c52b3278872083fca2c
|
||||
# COMPILER: pseudocode -> Python (minimal transforms)
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def _filter_pseudocode(pseudocode: str) -> str:
|
||||
"""Filter raw PDF pseudocode to only include actual code lines."""
|
||||
pcode_lines, in_lambda, depth = [], 0, 0
|
||||
for line in pseudocode.split('\n'):
|
||||
s = line.strip()
|
||||
if not s: continue
|
||||
if '=>' in s or re.match(r'^[A-Z_]+\(', s): continue # Skip example lines
|
||||
if '= lambda(' in s: in_lambda += 1; continue # Skip lambda definitions
|
||||
if in_lambda > 0:
|
||||
if s.endswith(');'): in_lambda -= 1
|
||||
continue
|
||||
# Only include lines that look like pseudocode
|
||||
is_code = (any(p in s for p in ['D0.', 'D1.', 'S0.', 'S1.', 'S2.', 'SCC =', 'SCC ?', 'VCC', 'EXEC', 'tmp =', 'tmp[', 'lane =', 'PC =',
|
||||
'D0[', 'D1[', 'S0[', 'S1[', 'S2[', 'MEM[', 'RETURN_DATA', 'VADDR', 'VDATA', 'VDST', 'SADDR', 'OFFSET']) or
|
||||
s.startswith(('if ', 'else', 'elsif', 'endif', 'declare ', 'for ', 'endfor', '//')) or
|
||||
re.match(r'^[a-z_]+\s*=', s) or re.match(r'^[a-z_]+\[', s) or (depth > 0 and '=' in s))
|
||||
if s.startswith('if '): depth += 1
|
||||
elif s.startswith('endif'): depth = max(0, depth - 1)
|
||||
if is_code: pcode_lines.append(s)
|
||||
return '\n'.join(pcode_lines)
|
||||
|
||||
def _compile_pseudocode(pseudocode: str) -> str:
|
||||
"""Compile pseudocode to Python. Transforms are minimal - most syntax just works."""
|
||||
pseudocode = re.sub(r'\bpass\b', 'pass_', pseudocode) # 'pass' is Python keyword
|
||||
@@ -756,9 +777,10 @@ _PCODE_GLOBALS = {
|
||||
@functools.cache
|
||||
def compile_pseudocode(cls_name: str, op_name: str, pseudocode: str):
|
||||
"""Compile pseudocode string to executable function. Cached for performance."""
|
||||
code = _compile_pseudocode(pseudocode)
|
||||
filtered = _filter_pseudocode(pseudocode)
|
||||
code = _compile_pseudocode(filtered)
|
||||
code = _apply_pseudocode_fixes(op_name, code)
|
||||
fn_code = _generate_function(cls_name, op_name, pseudocode, code)
|
||||
fn_code = _generate_function(cls_name, op_name, filtered, code)
|
||||
fn_name = f"_{cls_name}_{op_name}"
|
||||
local_ns = {}
|
||||
exec(fn_code, _PCODE_GLOBALS, local_ns)
|
||||
|
||||
@@ -1,457 +1,305 @@
|
||||
# Generate AMD ISA autogen files from PDF documentation
|
||||
# Combines format/enum generation (previously in dsl.py) and pseudocode compilation (previously in pcode.py)
|
||||
# Usage: python -m extra.assembly.amd.pdf [--arch rdna3|rdna4|cdna|all]
|
||||
import re, functools
|
||||
from pathlib import Path
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
# Generic PDF text extractor - no external dependencies
|
||||
import re, zlib
|
||||
from tinygrad.helpers import fetch, merge_dicts
|
||||
|
||||
PDF_URLS = {
|
||||
"rdna3": "https://docs.amd.com/api/khub/documents/UVVZM22UN7tMUeiW_4ShTQ/content",
|
||||
"rdna4": "https://docs.amd.com/api/khub/documents/uQpkEvk3pv~kfAb2x~j4uw/content",
|
||||
"cdna": ["https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf",
|
||||
"https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-cdna4-instruction-set-architecture.pdf"],
|
||||
"cdna": "https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-cdna4-instruction-set-architecture.pdf",
|
||||
}
|
||||
|
||||
# Field type mappings and ordering
|
||||
FIELD_TYPES = {'SSRC0': 'SSrc', 'SSRC1': 'SSrc', 'SOFFSET': 'SSrc', 'SADDR': 'SSrc', 'SRC0': 'Src', 'SRC1': 'Src', 'SRC2': 'Src',
|
||||
'SDST': 'SGPRField', 'SBASE': 'SGPRField', 'SDATA': 'SGPRField', 'SRSRC': 'SGPRField', 'VDST': 'VGPRField', 'VSRC1': 'VGPRField',
|
||||
'VDATA': 'VGPRField', 'VADDR': 'VGPRField', 'ADDR': 'VGPRField', 'DATA': 'VGPRField', 'DATA0': 'VGPRField', 'DATA1': 'VGPRField',
|
||||
'SIMM16': 'SImm', 'OFFSET': 'Imm', 'OPX': 'VOPDOp', 'OPY': 'VOPDOp', 'SRCX0': 'Src', 'SRCY0': 'Src',
|
||||
'VSRCX1': 'VGPRField', 'VSRCY1': 'VGPRField', 'VDSTX': 'VGPRField', 'VDSTY': 'VDSTYEnc'}
|
||||
FIELD_ORDER = {
|
||||
'SOP2': ['op', 'sdst', 'ssrc0', 'ssrc1'], 'SOP1': ['op', 'sdst', 'ssrc0'], 'SOPC': ['op', 'ssrc0', 'ssrc1'],
|
||||
'SOPK': ['op', 'sdst', 'simm16'], 'SOPP': ['op', 'simm16'], 'VOP1': ['op', 'vdst', 'src0'], 'VOPC': ['op', 'src0', 'vsrc1'],
|
||||
'VOP2': ['op', 'vdst', 'src0', 'vsrc1'], 'VOP3SD': ['op', 'vdst', 'sdst', 'src0', 'src1', 'src2', 'clmp'],
|
||||
'SMEM': ['op', 'sdata', 'sbase', 'soffset', 'offset', 'glc', 'dlc'], 'DS': ['op', 'vdst', 'addr', 'data0', 'data1'],
|
||||
'VOP3': ['op', 'vdst', 'src0', 'src1', 'src2', 'omod', 'neg', 'abs', 'clmp', 'opsel'],
|
||||
'VOP3P': ['op', 'vdst', 'src0', 'src1', 'src2', 'neg', 'neg_hi', 'opsel', 'opsel_hi', 'clmp'],
|
||||
'FLAT': ['op', 'vdst', 'addr', 'data', 'saddr', 'offset', 'seg', 'dlc', 'glc', 'slc'],
|
||||
'MUBUF': ['op', 'vdata', 'vaddr', 'srsrc', 'soffset', 'offset', 'offen', 'idxen', 'glc', 'dlc', 'slc', 'tfe'],
|
||||
'MTBUF': ['op', 'vdata', 'vaddr', 'srsrc', 'soffset', 'offset', 'format', 'offen', 'idxen', 'glc', 'dlc', 'slc', 'tfe'],
|
||||
'MIMG': ['op', 'vdata', 'vaddr', 'srsrc', 'ssamp', 'dmask', 'dim', 'unrm', 'dlc', 'glc', 'slc'],
|
||||
'EXP': ['en', 'target', 'vsrc0', 'vsrc1', 'vsrc2', 'vsrc3', 'done', 'row'],
|
||||
'VINTERP': ['op', 'vdst', 'src0', 'src1', 'src2', 'waitexp', 'clmp', 'opsel', 'neg'],
|
||||
'VOPD': ['opx', 'opy', 'vdstx', 'vdsty', 'srcx0', 'vsrcx1', 'srcy0', 'vsrcy1'],
|
||||
'LDSDIR': ['op', 'vdst', 'attr', 'attr_chan', 'wait_va']}
|
||||
SRC_EXTRAS = {233: 'DPP8', 234: 'DPP8FI', 250: 'DPP16', 251: 'VCCZ', 252: 'EXECZ', 254: 'LDS_DIRECT'}
|
||||
FLOAT_MAP = {'0.5': 'POS_HALF', '-0.5': 'NEG_HALF', '1.0': 'POS_ONE', '-1.0': 'NEG_ONE', '2.0': 'POS_TWO', '-2.0': 'NEG_TWO',
|
||||
'4.0': 'POS_FOUR', '-4.0': 'NEG_FOUR', '1/(2*PI)': 'INV_2PI', '0': 'ZERO'}
|
||||
INST_PATTERN = re.compile(r'^([SVD]S?_[A-Z0-9_]+|(?:FLAT|GLOBAL|SCRATCH)_[A-Z0-9_]+)\s+(\d+)\s*$', re.M)
|
||||
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# PDF PARSING WITH PAGE CACHING
|
||||
# Generic PDF extraction tools
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
class CachedPDF:
|
||||
"""PDF wrapper with page text/table caching for faster repeated access."""
|
||||
def __init__(self, pdf):
|
||||
self._pdf, self._text_cache, self._table_cache = pdf, {}, {}
|
||||
def __len__(self): return len(self._pdf.pages)
|
||||
def text(self, i):
|
||||
if i not in self._text_cache: self._text_cache[i] = self._pdf.pages[i].extract_text() or ''
|
||||
return self._text_cache[i]
|
||||
def tables(self, i):
|
||||
if i not in self._table_cache: self._table_cache[i] = [t.extract() for t in self._pdf.pages[i].find_tables()]
|
||||
return self._table_cache[i]
|
||||
def extract(url: str) -> list[list[tuple[float, float, str, str]]]:
|
||||
"""Extract positioned text from PDF. Returns list of text elements (x, y, text, font) per page."""
|
||||
data = fetch(url).read_bytes()
|
||||
|
||||
def _parse_bits(s: str) -> tuple[int, int] | None:
|
||||
return (int(m.group(1)), int(m.group(2) or m.group(1))) if (m := re.match(r'\[(\d+)(?::(\d+))?\]', s)) else None
|
||||
# Parse xref table to locate objects
|
||||
xref: dict[int, int] = {}
|
||||
pos = int(re.search(rb'startxref\s+(\d+)', data).group(1)) + 4
|
||||
while data[pos:pos+7] != b'trailer':
|
||||
while data[pos:pos+1] in b' \r\n': pos += 1
|
||||
line_end = data.find(b'\n', pos)
|
||||
start_obj, count = map(int, data[pos:line_end].split()[:2])
|
||||
pos = line_end + 1
|
||||
for i in range(count):
|
||||
if data[pos+17:pos+18] == b'n' and (off := int(data[pos:pos+10])) > 0: xref[start_obj + i] = off
|
||||
pos += 20
|
||||
|
||||
def _parse_fields_table(table: list, fmt: str, enums: set[str]) -> list[tuple]:
|
||||
fields = []
|
||||
for row in table[1:]:
|
||||
if not row or not row[0]: continue
|
||||
name, bits_str = row[0].split('\n')[0].strip(), (row[1] or '').split('\n')[0].strip()
|
||||
if not (bits := _parse_bits(bits_str)): continue
|
||||
enc_val, hi, lo = None, bits[0], bits[1]
|
||||
if name == 'ENCODING' and row[2]:
|
||||
desc = row[2]
|
||||
# Handle shared FLAT/GLOBAL/SCRATCH table: look for format-specific encoding
|
||||
fmt_key = fmt.lstrip('V').lower().capitalize() # VFLAT -> Flat, VGLOBAL -> Global
|
||||
if m := re.search(rf"{fmt_key}='b([01_]+)", desc):
|
||||
enc_bits = m.group(1).replace('_', '')
|
||||
elif m := re.search(r"(?:'b|Must be:\s*)([01_]+)", desc):
|
||||
enc_bits = m.group(1).replace('_', '')
|
||||
else:
|
||||
enc_bits = None
|
||||
if enc_bits:
|
||||
enc_val, declared_width, actual_width = int(enc_bits, 2), hi - lo + 1, len(enc_bits)
|
||||
if actual_width > declared_width: lo = hi - actual_width + 1
|
||||
ftype = f"{fmt}Op" if name == 'OP' and f"{fmt}Op" in enums else FIELD_TYPES.get(name.upper())
|
||||
fields.append((name, hi, lo, enc_val, ftype))
|
||||
return fields
|
||||
def get_stream(n: int) -> bytes:
|
||||
obj = data[xref[n]:data.find(b'endobj', xref[n])]
|
||||
raw = obj[obj.find(b'stream\n') + 7:obj.find(b'\nendstream')]
|
||||
return zlib.decompress(raw) if b'/FlateDecode' in obj else raw
|
||||
|
||||
def _parse_single_pdf(url: str):
|
||||
"""Parse a single PDF and return (formats, enums, src_enum, doc_name, instructions)."""
|
||||
import pdfplumber
|
||||
from tinygrad.helpers import fetch
|
||||
# Find page content streams and extract text
|
||||
pages = []
|
||||
for n in sorted(xref):
|
||||
if b'/Type /Page' not in data[xref[n]:xref[n]+500]: continue
|
||||
if not (m := re.search(rb'/Contents (\d+) 0 R', data[xref[n]:xref[n]+500])): continue
|
||||
stream = get_stream(int(m.group(1))).decode('latin-1')
|
||||
elements, font = [], ''
|
||||
for bt in re.finditer(r'BT(.*?)ET', stream, re.S):
|
||||
x, y = 0.0, 0.0
|
||||
for m in re.finditer(r'(/F[\d.]+) [\d.]+ Tf|([\d.+-]+) ([\d.+-]+) Td|[\d.+-]+ [\d.+-]+ [\d.+-]+ [\d.+-]+ ([\d.+-]+) ([\d.+-]+) Tm|<([0-9A-Fa-f]+)>.*?Tj|\[([^\]]+)\] TJ', bt.group(1)):
|
||||
if m.group(1): font = m.group(1)
|
||||
elif m.group(2): x, y = x + float(m.group(2)), y + float(m.group(3))
|
||||
elif m.group(4): x, y = float(m.group(4)), float(m.group(5))
|
||||
elif m.group(6) and (t := bytes.fromhex(m.group(6)).decode('latin-1')).strip(): elements.append((x, y, t, font))
|
||||
elif m.group(7) and (t := ''.join(bytes.fromhex(h).decode('latin-1') for h in re.findall(r'<([0-9A-Fa-f]+)>', m.group(7)))).strip(): elements.append((x, y, t, font))
|
||||
pages.append(sorted(elements, key=lambda e: (-e[1], e[0])))
|
||||
return pages
|
||||
|
||||
pdf = CachedPDF(pdfplumber.open(fetch(url)))
|
||||
total_pages = len(pdf)
|
||||
def extract_tables(pages: list[list[tuple[float, float, str, str]]]) -> dict[int, tuple[str, list[list[str]]]]:
|
||||
"""Extract numbered tables from PDF pages. Returns {table_num: (title, rows)} where rows is list of cells per row."""
|
||||
def group_by_y(texts, key=lambda y: round(y)):
|
||||
by_y: dict[int, list[tuple[float, float, str]]] = {}
|
||||
for x, y, t, _ in texts:
|
||||
by_y.setdefault(key(y), []).append((x, y, t))
|
||||
return by_y
|
||||
|
||||
# Auto-detect document type
|
||||
first_page = pdf.text(0)
|
||||
is_cdna4, is_cdna3 = 'CDNA4' in first_page or 'CDNA 4' in first_page, 'CDNA3' in first_page or 'MI300' in first_page
|
||||
is_cdna, is_rdna4 = is_cdna3 or is_cdna4, 'RDNA4' in first_page or 'RDNA 4' in first_page
|
||||
is_rdna35, is_rdna3 = 'RDNA3.5' in first_page or 'RDNA 3.5' in first_page, 'RDNA3' in first_page and 'RDNA3.5' not in first_page
|
||||
doc_name = "CDNA4" if is_cdna4 else "CDNA3" if is_cdna3 else "RDNA4" if is_rdna4 else "RDNA3.5" if is_rdna35 else "RDNA3" if is_rdna3 else "Unknown"
|
||||
# Find all table headers by merging text on same line
|
||||
table_positions = []
|
||||
for page_idx, texts in enumerate(pages):
|
||||
for items in group_by_y(texts).values():
|
||||
line = ''.join(t for _, t in sorted((x, t) for x, _, t in items))
|
||||
if m := re.search(r'Table (\d+)\. (.+)', line):
|
||||
table_positions.append((int(m.group(1)), m.group(2).strip(), page_idx, items[0][1]))
|
||||
table_positions.sort(key=lambda t: (t[2], -t[3]))
|
||||
|
||||
# Find Microcode Formats section (for formats/enums)
|
||||
microcode_start = next((i for i in range(int(total_pages * 0.2), total_pages)
|
||||
if re.search(r'\d+\.\d+\.\d+\.\s+SOP2\b|Chapter \d+\.\s+Microcode Formats', pdf.text(i))), int(total_pages * 0.9))
|
||||
# Find Instructions section (for pseudocode)
|
||||
instr_start = next((i for i in range(int(total_pages * 0.1), int(total_pages * 0.5))
|
||||
if re.search(r'Chapter \d+\.\s+Instructions\b', pdf.text(i))), total_pages // 3)
|
||||
instr_end = next((i for start in [int(total_pages * 0.6), int(total_pages * 0.5), instr_start]
|
||||
for i in range(start, min(start + 100, total_pages))
|
||||
if re.search(r'Chapter \d+\.\s+Microcode Formats', pdf.text(i))), total_pages)
|
||||
|
||||
# Parse src enum from SSRC encoding table
|
||||
src_enum = dict(SRC_EXTRAS)
|
||||
for i in range(microcode_start, min(microcode_start + 10, total_pages)):
|
||||
text = pdf.text(i)
|
||||
if 'SSRC0' in text and 'VCC_LO' in text:
|
||||
for m in re.finditer(r'^(\d+)\s+(\S+)', text, re.M):
|
||||
val, name = int(m.group(1)), m.group(2).rstrip('.:')
|
||||
if name in FLOAT_MAP: src_enum[val] = FLOAT_MAP[name]
|
||||
elif re.match(r'^[A-Z][A-Z0-9_]*$', name): src_enum[val] = name
|
||||
# For each table, find rows with matching X positions
|
||||
result: dict[int, tuple[str, list[list[str]]]] = {}
|
||||
for num, title, start_page, header_y in table_positions:
|
||||
rows, col_xs = [], None
|
||||
for page_idx in range(start_page, len(pages)):
|
||||
page_texts = [(x, y, t) for x, y, t, _ in pages[page_idx] if 30 < y < 760 and (page_idx > start_page or y < header_y)]
|
||||
for items in sorted(group_by_y([(x, y, t, '') for x, y, t in page_texts], key=lambda y: round(y / 5)).values(), key=lambda items: -items[0][1]):
|
||||
xs = tuple(sorted(round(x) for x, _, _ in items))
|
||||
if col_xs is None:
|
||||
if len(xs) < 2: continue # Skip single-column rows before table starts
|
||||
col_xs = xs
|
||||
elif len(xs) == 1 and xs[0] in col_xs: continue # Skip continuation rows at known column positions
|
||||
elif not any(c in xs for c in col_xs[:2]): break # Row missing first columns = end of table
|
||||
rows.append([t for _, t in sorted((x, t) for x, _, t in items)])
|
||||
else: continue
|
||||
break
|
||||
if rows: result[num] = (title, rows)
|
||||
return result
|
||||
|
||||
# Parse opcode tables
|
||||
full_text = '\n'.join(pdf.text(i) for i in range(microcode_start, min(microcode_start + 50, total_pages)))
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# AMD specific extraction
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def extract_enums(tables: dict[int, tuple[str, list[list[str]]]]) -> dict[str, dict[int, str]]:
|
||||
"""Extract all enums from tables. Returns {enum_name: {value: name}}."""
|
||||
enums: dict[str, dict[int, str]] = {}
|
||||
for m in re.finditer(r'Table \d+\. (\w+) Opcodes(.*?)(?=Table \d+\.|\n\d+\.\d+\.\d+\.\s+\w+\s*\nDescription|$)', full_text, re.S):
|
||||
if ops := {int(x.group(1)): x.group(2) for x in re.finditer(r'(\d+)\s+([A-Z][A-Z0-9_]+)', m.group(2))}:
|
||||
enums[m.group(1) + "Op"] = ops
|
||||
if vopd_m := re.search(r'Table \d+\. VOPD Y-Opcodes\n(.*?)(?=Table \d+\.|15\.\d)', full_text, re.S):
|
||||
if ops := {int(x.group(1)): x.group(2) for x in re.finditer(r'(\d+)\s+(V_DUAL_\w+)', vopd_m.group(1))}:
|
||||
enums["VOPDOp"] = ops
|
||||
enum_names = set(enums.keys())
|
||||
for num, (title, rows) in tables.items():
|
||||
# Opcode enums from "XXX Opcodes" tables
|
||||
if m := re.match(r'(\w+) (?:Y-)?Opcodes', title):
|
||||
fmt_name = 'VOPD' if 'Y-Opcodes' in title else m.group(1)
|
||||
ops: dict[int, str] = {}
|
||||
for row in rows:
|
||||
for i in range(0, len(row) - 1, 2):
|
||||
if row[i].isdigit() and re.match(r'^[A-Z][A-Z0-9_]+$', row[i + 1]):
|
||||
ops[int(row[i])] = row[i + 1]
|
||||
if ops: enums[fmt_name] = ops
|
||||
# BufFmt from "Data Format" tables
|
||||
if 'Data Format' in title:
|
||||
for row in rows:
|
||||
for i in range(0, len(row) - 1, 2):
|
||||
if row[i].isdigit() and re.match(r'^[\dA-Z_]+$', row[i + 1]) and 'INVALID' not in row[i + 1]:
|
||||
enums.setdefault('BufFmt', {})[int(row[i])] = row[i + 1]
|
||||
return enums
|
||||
|
||||
# Parse instruction formats
|
||||
def is_fields_table(t): return t and len(t) > 1 and t[0] and 'Field' in str(t[0][0] or '')
|
||||
def has_encoding(fields): return any(f[0] == 'ENCODING' for f in fields)
|
||||
def has_header_before_fields(text): return (pos := text.find('Field Name')) != -1 and bool(re.search(r'\d+\.\d+\.\d+\.\s+\w+\s*\n', text[:pos]))
|
||||
def extract_ins(tables: dict[int, tuple[str, list[list[str]]]]) -> tuple[dict[str, list[tuple[str, int, int]]], dict[str, str]]:
|
||||
"""Extract formats and encodings from 'XXX Fields' tables. Returns (formats, encodings)."""
|
||||
formats: dict[str, list[tuple[str, int, int]]] = {}
|
||||
encodings: dict[str, str] = {}
|
||||
for num, (title, rows) in tables.items():
|
||||
if not (m := re.match(r'(\w+) Fields$', title)): continue
|
||||
fmt_name = m.group(1)
|
||||
fields = []
|
||||
for row in rows:
|
||||
if len(row) < 2: continue
|
||||
if (bits := re.match(r'\[?(\d+):(\d+)\]?$', row[1])) or (bits := re.match(r'\[(\d+)\]$', row[1])):
|
||||
field_name = row[0].lower()
|
||||
hi, lo = int(bits.group(1)), int(bits.group(2)) if bits.lastindex >= 2 else int(bits.group(1))
|
||||
if field_name == 'encoding' and len(row) >= 3:
|
||||
enc_bits = None
|
||||
if "'b" in row[2]: enc_bits = row[2].split("'b")[-1].replace('_', '')
|
||||
elif (enc := re.search(r':\s*([01_]+)', row[2])): enc_bits = enc.group(1).replace('_', '')
|
||||
if enc_bits:
|
||||
# If encoding bits exceed field width, extend field to match (AMD docs sometimes have this)
|
||||
declared_width, actual_width = hi - lo + 1, len(enc_bits)
|
||||
if actual_width > declared_width: lo = hi - actual_width + 1
|
||||
encodings[fmt_name] = enc_bits
|
||||
fields.append((field_name, hi, lo))
|
||||
if fields: formats[fmt_name] = fields
|
||||
return formats, encodings
|
||||
|
||||
format_headers = []
|
||||
for i in range(50):
|
||||
if microcode_start + i >= total_pages: break
|
||||
text = pdf.text(microcode_start + i)
|
||||
for m in re.finditer(r'\d+\.\d+\.\d+\.\s+(\w+)\s*\n?Description', text): format_headers.append((m.group(1), i, m.start()))
|
||||
for m in re.finditer(r'\d+\.\d+\.\d+\.\s+(\w+)\s*\n', text):
|
||||
fmt_name = m.group(1)
|
||||
if is_cdna and fmt_name.isupper() and len(fmt_name) >= 2: format_headers.append((fmt_name, i, m.start()))
|
||||
elif m.start() > len(text) - 200 and 'Description' not in text[m.end():] and i + 1 < 50:
|
||||
next_text = pdf.text(microcode_start + i + 1).lstrip()
|
||||
if next_text.startswith('Description') or (next_text.startswith('"RDNA') and 'Description' in next_text[:200]):
|
||||
format_headers.append((fmt_name, i, m.start()))
|
||||
# RDNA4: Look for "Table X. Y Fields" patterns (e.g., VIMAGE, VSAMPLE, or shared FLAT/GLOBAL/SCRATCH)
|
||||
for m in re.finditer(r'Table \d+\.\s+([\w,\s]+?)\s+Fields', text):
|
||||
table_name = m.group(1).strip()
|
||||
# Handle shared table like "FLAT, GLOBAL and SCRATCH"
|
||||
if ',' in table_name or ' and ' in table_name:
|
||||
for part in re.split(r',\s*|\s+and\s+', table_name):
|
||||
fmt_name = 'V' + part.strip()
|
||||
if fmt_name not in [h[0] for h in format_headers]: format_headers.append((fmt_name, i, m.start()))
|
||||
elif table_name.startswith('V'):
|
||||
if table_name not in [h[0] for h in format_headers]: format_headers.append((table_name, i, m.start()))
|
||||
def extract_pcode(pages: list[list[tuple[float, float, str, str]]], enums: dict[str, dict[int, str]]) -> dict[tuple[str, int], str]:
|
||||
"""Extract pseudocode for instructions. Returns {(name, opcode): pseudocode}."""
|
||||
# Build lookup from instruction name to opcode
|
||||
name_to_op = {name: op for ops in enums.values() for op, name in ops.items()}
|
||||
|
||||
formats: dict[str, list] = {}
|
||||
for fmt_name, rel_idx, header_pos in format_headers:
|
||||
if fmt_name in formats: continue
|
||||
page_idx = microcode_start + rel_idx
|
||||
text = pdf.text(page_idx)
|
||||
field_pos = text.find('Field Name', header_pos)
|
||||
fields = None
|
||||
for offset in range(3):
|
||||
if page_idx + offset >= total_pages: break
|
||||
if offset > 0 and has_header_before_fields(pdf.text(page_idx + offset)): break
|
||||
for t in pdf.tables(page_idx + offset) if offset > 0 or field_pos > header_pos else []:
|
||||
if is_fields_table(t) and (f := _parse_fields_table(t, fmt_name, enum_names)) and has_encoding(f): fields = f; break
|
||||
if fields: break
|
||||
if not fields and field_pos > header_pos:
|
||||
for t in pdf.tables(page_idx):
|
||||
if is_fields_table(t) and (f := _parse_fields_table(t, fmt_name, enum_names)): fields = f; break
|
||||
if not fields: continue
|
||||
field_names = {f[0] for f in fields}
|
||||
for pg_offset in range(1, 3):
|
||||
if page_idx + pg_offset >= total_pages or has_header_before_fields(pdf.text(page_idx + pg_offset)): break
|
||||
for t in pdf.tables(page_idx + pg_offset):
|
||||
if is_fields_table(t) and (extra := _parse_fields_table(t, fmt_name, enum_names)) and not has_encoding(extra):
|
||||
for ef in extra:
|
||||
if ef[0] not in field_names: fields.append(ef); field_names.add(ef[0])
|
||||
break
|
||||
formats[fmt_name] = fields
|
||||
# First pass: find all instruction headers across all pages
|
||||
all_instructions: list[tuple[int, float, str, int]] = [] # (page_idx, y, name, opcode)
|
||||
for page_idx, page in enumerate(pages):
|
||||
by_y: dict[int, list[tuple[float, str]]] = {}
|
||||
for x, y, t, _ in page:
|
||||
by_y.setdefault(round(y), []).append((x, t))
|
||||
for y, items in sorted(by_y.items(), reverse=True):
|
||||
left = [(x, t) for x, t in items if 55 < x < 65]
|
||||
right = [(x, t) for x, t in items if 535 < x < 550]
|
||||
if left and right and left[0][1] in name_to_op and right[0][1].isdigit():
|
||||
all_instructions.append((page_idx, y, left[0][1], int(right[0][1])))
|
||||
|
||||
# Fix known PDF errors (RDNA-specific SMEM bit positions)
|
||||
if 'SMEM' in formats and not is_cdna:
|
||||
formats['SMEM'] = [(n, 13 if n == 'DLC' else 14 if n == 'GLC' else h, 13 if n == 'DLC' else 14 if n == 'GLC' else l, e, t)
|
||||
for n, h, l, e, t in formats['SMEM']]
|
||||
# RDNA4: VFLAT/VGLOBAL/VSCRATCH OP field is [20:14] not [20:13] (PDF documentation error)
|
||||
for fmt_name in ['VFLAT', 'VGLOBAL', 'VSCRATCH']:
|
||||
if fmt_name in formats:
|
||||
formats[fmt_name] = [(n, h, 14 if n == 'OP' else l, e, t) for n, h, l, e, t in formats[fmt_name]]
|
||||
if doc_name in ('RDNA3', 'RDNA3.5'):
|
||||
if 'SOPPOp' in enums:
|
||||
for k, v in {8: 'S_WAITCNT_DEPCTR', 58: 'S_TTRACEDATA', 59: 'S_TTRACEDATA_IMM'}.items():
|
||||
assert k not in enums['SOPPOp']; enums['SOPPOp'][k] = v
|
||||
if 'SOPKOp' in enums:
|
||||
for k, v in {22: 'S_SUBVECTOR_LOOP_BEGIN', 23: 'S_SUBVECTOR_LOOP_END'}.items():
|
||||
assert k not in enums['SOPKOp']; enums['SOPKOp'][k] = v
|
||||
if 'SMEMOp' in enums:
|
||||
for k, v in {34: 'S_ATC_PROBE', 35: 'S_ATC_PROBE_BUFFER'}.items():
|
||||
assert k not in enums['SMEMOp']; enums['SMEMOp'][k] = v
|
||||
if 'DSOp' in enums:
|
||||
for k, v in {24: 'DS_GWS_SEMA_RELEASE_ALL', 25: 'DS_GWS_INIT', 26: 'DS_GWS_SEMA_V', 27: 'DS_GWS_SEMA_BR', 28: 'DS_GWS_SEMA_P', 29: 'DS_GWS_BARRIER'}.items():
|
||||
assert k not in enums['DSOp']; enums['DSOp'][k] = v
|
||||
if 'FLATOp' in enums:
|
||||
for k, v in {40: 'GLOBAL_LOAD_ADDTID_B32', 41: 'GLOBAL_STORE_ADDTID_B32', 55: 'FLAT_ATOMIC_CSUB_U32'}.items():
|
||||
assert k not in enums['FLATOp']; enums['FLATOp'][k] = v
|
||||
# CDNA MTBUF: PDF is missing the FORMAT field (bits[25:19]) which is required for tbuffer_* instructions
|
||||
if is_cdna and 'MTBUF' in formats:
|
||||
field_names = {f[0] for f in formats['MTBUF']}
|
||||
if 'FORMAT' not in field_names:
|
||||
formats['MTBUF'].append(('FORMAT', 25, 19, None, None))
|
||||
# CDNA SDWA/DPP: PDF only has modifier fields, need VOP1/VOP2 overlay for correct encoding
|
||||
if is_cdna:
|
||||
if 'SDWA' in formats:
|
||||
formats['SDWA'] = [('ENCODING', 8, 0, 0xf9, None), ('VOP_OP', 16, 9, None, None), ('VDST', 24, 17, None, 'VGPRField'), ('VOP2_OP', 31, 25, None, None)] + \
|
||||
[f for f in formats['SDWA'] if f[0] not in ('ENCODING', 'SDST', 'SD', 'ROW_MASK')]
|
||||
if 'DPP' in formats:
|
||||
formats['DPP'] = [('ENCODING', 8, 0, 0xfa, None), ('VOP_OP', 16, 9, None, None), ('VDST', 24, 17, None, 'VGPRField'), ('VOP2_OP', 31, 25, None, None),
|
||||
('SRC0', 39, 32, None, 'Src'), ('DPP_CTRL', 48, 40, None, None), ('BOUND_CTRL', 51, 51, None, None), ('SRC0_NEG', 52, 52, None, None), ('SRC0_ABS', 53, 53, None, None),
|
||||
('SRC1_NEG', 54, 54, None, None), ('SRC1_ABS', 55, 55, None, None), ('BANK_MASK', 59, 56, None, None), ('ROW_MASK', 63, 60, None, None)]
|
||||
|
||||
# Extract pseudocode for instructions
|
||||
all_text = '\n'.join(pdf.text(i) for i in range(instr_start, instr_end))
|
||||
matches = list(INST_PATTERN.finditer(all_text))
|
||||
raw_pseudocode: dict[tuple[str, int], str] = {}
|
||||
for i, match in enumerate(matches):
|
||||
name, opcode = match.group(1), int(match.group(2))
|
||||
start, end = match.end(), matches[i + 1].start() if i + 1 < len(matches) else match.end() + 2000
|
||||
snippet = all_text[start:end].strip()
|
||||
if pseudocode := _extract_pseudocode(snippet): raw_pseudocode[(name, opcode)] = pseudocode
|
||||
|
||||
# Extract unified buffer format table (RDNA only, for MTBUF format field)
|
||||
buf_fmt = {}
|
||||
if not is_cdna:
|
||||
for i in range(total_pages):
|
||||
for t in pdf.tables(i):
|
||||
if t and len(t) > 2 and t[0] and '#' in str(t[0][0]) and 'Format' in str(t[0]):
|
||||
for row in t[1:]:
|
||||
for j in range(0, len(row) - 1, 3): # table has 3-column groups: #, Format, (empty)
|
||||
if row[j] and row[j].isdigit() and row[j+1] and re.match(r'^[\d_]+_(UNORM|SNORM|USCALED|SSCALED|UINT|SINT|FLOAT)$', row[j+1]):
|
||||
buf_fmt[int(row[j])] = row[j+1]
|
||||
if buf_fmt: break
|
||||
if buf_fmt: break
|
||||
|
||||
return {"formats": formats, "enums": enums, "src_enum": src_enum, "doc_name": doc_name, "pseudocode": raw_pseudocode, "is_cdna": is_cdna, "buf_fmt": buf_fmt}
|
||||
|
||||
def _extract_pseudocode(text: str) -> str | None:
|
||||
"""Extract pseudocode from an instruction description snippet."""
|
||||
lines, result, depth, in_lambda = text.split('\n'), [], 0, 0
|
||||
for line in lines:
|
||||
s = line.strip()
|
||||
if not s or re.match(r'^\d+ of \d+$', s) or re.match(r'^\d+\.\d+\..*Instructions', s): continue
|
||||
if s.startswith(('Notes', 'Functional examples', '•', '-')): break # Stop at notes/bullets
|
||||
if s.startswith(('"RDNA', 'AMD ', 'CDNA')): continue
|
||||
if '•' in s or '–' in s: continue # Skip lines with bullets/dashes
|
||||
if '= lambda(' in s: in_lambda += 1; continue
|
||||
if in_lambda > 0:
|
||||
if s.endswith(');'): in_lambda -= 1
|
||||
continue
|
||||
if s.startswith('if '): depth += 1
|
||||
elif s.startswith('endif'): depth = max(0, depth - 1)
|
||||
if s.endswith('.') and not any(p in s for p in ['D0', 'D1', 'S0', 'S1', 'S2', 'SCC', 'VCC', 'tmp', '=']): continue
|
||||
if re.match(r'^[a-z].*\.$', s) and '=' not in s: continue
|
||||
is_code = (any(p in s for p in ['D0.', 'D1.', 'S0.', 'S1.', 'S2.', 'SCC =', 'SCC ?', 'VCC', 'EXEC', 'tmp =', 'tmp[', 'lane =', 'PC =',
|
||||
'D0[', 'D1[', 'S0[', 'S1[', 'S2[', 'MEM[', 'RETURN_DATA',
|
||||
'VADDR', 'VDATA', 'VDST', 'SADDR', 'OFFSET']) or
|
||||
s.startswith(('if ', 'else', 'elsif', 'endif', 'declare ', 'for ', 'endfor', '//')) or
|
||||
re.match(r'^[a-z_]+\s*=', s) or re.match(r'^[a-z_]+\[', s) or (depth > 0 and '=' in s))
|
||||
if is_code: result.append(s)
|
||||
return '\n'.join(result) if result else None
|
||||
|
||||
def _merge_results(results: list[dict]) -> dict:
|
||||
"""Merge multiple PDF parse results into a superset."""
|
||||
merged = {"formats": {}, "enums": {}, "src_enum": dict(SRC_EXTRAS), "doc_names": [], "pseudocode": {}, "is_cdna": False, "buf_fmt": {}}
|
||||
for r in results:
|
||||
merged["doc_names"].append(r["doc_name"])
|
||||
merged["is_cdna"] = merged["is_cdna"] or r["is_cdna"]
|
||||
for val, name in r["src_enum"].items():
|
||||
if val in merged["src_enum"]: assert merged["src_enum"][val] == name
|
||||
else: merged["src_enum"][val] = name
|
||||
for enum_name, ops in r["enums"].items():
|
||||
if enum_name not in merged["enums"]: merged["enums"][enum_name] = {}
|
||||
for val, name in ops.items():
|
||||
if val in merged["enums"][enum_name]: assert merged["enums"][enum_name][val] == name
|
||||
else: merged["enums"][enum_name][val] = name
|
||||
for fmt_name, fields in r["formats"].items():
|
||||
if fmt_name not in merged["formats"]: merged["formats"][fmt_name] = list(fields)
|
||||
else:
|
||||
existing = {f[0]: (f[1], f[2]) for f in merged["formats"][fmt_name]}
|
||||
for f in fields:
|
||||
if f[0] in existing: assert existing[f[0]] == (f[1], f[2])
|
||||
else: merged["formats"][fmt_name].append(f)
|
||||
for key, pc in r["pseudocode"].items():
|
||||
if key not in merged["pseudocode"]: merged["pseudocode"][key] = pc
|
||||
for val, name in r.get("buf_fmt", {}).items():
|
||||
if val not in merged["buf_fmt"]: merged["buf_fmt"][val] = name
|
||||
return merged
|
||||
# Second pass: extract pseudocode between consecutive instructions
|
||||
pcode: dict[tuple[str, int], str] = {}
|
||||
for i, (page_idx, y, name, opcode) in enumerate(all_instructions):
|
||||
# Get end boundary from next instruction
|
||||
if i + 1 < len(all_instructions):
|
||||
next_page, next_y = all_instructions[i + 1][0], all_instructions[i + 1][1]
|
||||
else:
|
||||
next_page, next_y = page_idx, 0
|
||||
# Collect F6 text from current position to next instruction
|
||||
lines = []
|
||||
for p in range(page_idx, next_page + 1):
|
||||
start_y = y if p == page_idx else 800
|
||||
end_y = next_y if p == next_page else 0
|
||||
lines.extend((p, y2, t) for x, y2, t, f in pages[p] if f in ('/F6.0', '/F7.0') and end_y < y2 < start_y)
|
||||
if lines:
|
||||
# Sort by page first, then by y descending within each page (higher y = earlier text in PDF)
|
||||
pcode_lines = [t.replace('Ê', '').strip() for _, _, t in sorted(lines, key=lambda x: (x[0], -x[1]))]
|
||||
if pcode_lines: pcode[(name, opcode)] = '\n'.join(pcode_lines)
|
||||
return pcode
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# CODE GENERATION
|
||||
# Write autogen files
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def _generate_enum_py(enums, src_enum, doc_name, buf_fmt=None) -> str:
|
||||
"""Generate enum.py content (just enums, no dsl.py dependency)."""
|
||||
def enum_lines(name, items): return [f"class {name}(IntEnum):"] + [f" {n} = {v}" for v, n in sorted(items.items())] + [""]
|
||||
lines = [f"# autogenerated from AMD {doc_name} ISA PDF by pdf.py - do not edit", "from enum import IntEnum", ""]
|
||||
lines += enum_lines("SrcEnum", src_enum) + sum([enum_lines(n, ops) for n, ops in sorted(enums.items())], [])
|
||||
if buf_fmt: lines += enum_lines("BufFmt", {v: f"BUF_FMT_{n}" for v, n in buf_fmt.items() if 1 <= v <= 63})
|
||||
return '\n'.join(lines)
|
||||
|
||||
def _generate_ins_py(formats, enums, src_enum, doc_name) -> str:
|
||||
"""Generate ins.py content (instruction formats and helpers, imports dsl.py and enum.py)."""
|
||||
def field_key(f, order): return order.index(f[0].lower()) if f[0].lower() in order else 1000
|
||||
lines = [f"# autogenerated from AMD {doc_name} ISA PDF by pdf.py - do not edit",
|
||||
"# ruff: noqa: F401,F403", "from typing import Annotated",
|
||||
"from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, Inst96, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField",
|
||||
"from extra.assembly.amd.autogen.{arch}.enum import *",
|
||||
"import functools", ""]
|
||||
format_defaults = {'VOP3P': {'opsel_hi': 3, 'opsel_hi2': 1}}
|
||||
lines.append("# instruction formats")
|
||||
# MIMG has optional NSA (Non-Sequential Address) fields that extend beyond 64 bits, but base encoding is 64-bit
|
||||
inst64_override = {'MIMG'}
|
||||
for fmt_name, fields in sorted(formats.items()):
|
||||
max_bit = max(f[1] for f in fields)
|
||||
if fmt_name in inst64_override: base = "Inst64"
|
||||
else: base = "Inst96" if max_bit > 63 else "Inst64" if max_bit > 31 or fmt_name == 'VOP3SD' else "Inst32"
|
||||
order = FIELD_ORDER.get(fmt_name, [])
|
||||
lines.append(f"class {fmt_name}({base}):")
|
||||
if enc := next((f for f in fields if f[0] == 'ENCODING'), None):
|
||||
lines.append(f" encoding = bits[{enc[1]}:{enc[2]}] == 0b{enc[3]:b}" if enc[1] != enc[2] else f" encoding = bits[{enc[1]}] == {enc[3]}")
|
||||
if defaults := format_defaults.get(fmt_name): lines.append(f" _defaults = {defaults}")
|
||||
for name, hi, lo, _, ftype in sorted([f for f in fields if f[0] != 'ENCODING'], key=lambda f: field_key(f, order)):
|
||||
ann = f":Annotated[BitField, {ftype}]" if ftype and ftype.endswith('Op') else f":{ftype}" if ftype else ""
|
||||
lines.append(f" {name.lower()}{ann} = bits[{hi}]" if hi == lo else f" {name.lower()}{ann} = bits[{hi}:{lo}]")
|
||||
def write_enums(enums: dict[str, dict[int, str]], arch: str, path: str):
|
||||
"""Write enum.py file from extracted enums."""
|
||||
lines = ["# autogenerated from AMD ISA PDF by pdf.py - do not edit", "from enum import IntEnum", ""]
|
||||
for name, values in sorted(enums.items()):
|
||||
suffix = "Op" if name not in ('Src', 'BufFmt') else ("Enum" if name == 'Src' else "")
|
||||
prefix = "BUF_FMT_" if name == 'BufFmt' else ""
|
||||
lines.append(f"class {name}{suffix}(IntEnum):")
|
||||
for val, member in sorted(values.items()):
|
||||
lines.append(f" {prefix}{member} = {val}")
|
||||
lines.append("")
|
||||
with open(path, "w") as f:
|
||||
f.write("\n".join(lines))
|
||||
|
||||
def write_ins(formats: dict[str, list[tuple[str, int, int]]], encodings: dict[str, str], enums: dict[str, dict[int, str]], arch: str, path: str):
|
||||
"""Write ins.py file from extracted formats and enums."""
|
||||
# Field types and ordering
|
||||
def field_type(name, fmt):
|
||||
if name == 'op' and fmt in enums: return f'Annotated[BitField, {fmt}Op]'
|
||||
if name in ('opx', 'opy'): return 'Annotated[BitField, VOPDOp]'
|
||||
if name == 'vdsty': return 'VDSTYEnc'
|
||||
if name in ('vdst', 'vsrc1', 'vaddr', 'vdata', 'data', 'data0', 'data1', 'addr', 'vsrc0', 'vsrc2', 'vsrc3'): return 'VGPRField'
|
||||
if name in ('sdst', 'sbase', 'sdata', 'srsrc', 'ssamp'): return 'SGPRField'
|
||||
if name.startswith('ssrc') or name in ('saddr', 'soffset'): return 'SSrc'
|
||||
if name in ('src0', 'srcx0', 'srcy0') or name.startswith('src') and name[3:].isdigit(): return 'Src'
|
||||
if name.startswith('simm'): return 'SImm'
|
||||
if name == 'offset' or name.startswith('imm'): return 'Imm'
|
||||
return None
|
||||
field_priority = ['encoding', 'op', 'opx', 'opy', 'vdst', 'vdstx', 'vdsty', 'sdst', 'vdata', 'sdata', 'addr', 'vaddr', 'data', 'data0', 'data1',
|
||||
'src0', 'srcx0', 'srcy0', 'vsrc0', 'ssrc0', 'src1', 'vsrc1', 'vsrcx1', 'vsrcy1', 'ssrc1', 'src2', 'vsrc2', 'src3', 'vsrc3',
|
||||
'saddr', 'sbase', 'srsrc', 'ssamp', 'soffset', 'offset', 'simm16', 'en', 'target', 'attr', 'attr_chan',
|
||||
'omod', 'neg', 'neg_hi', 'abs', 'clmp', 'opsel', 'opsel_hi', 'waitexp', 'wait_va',
|
||||
'dmask', 'dim', 'seg', 'format', 'offen', 'idxen', 'glc', 'dlc', 'slc', 'tfe', 'unrm', 'done', 'row']
|
||||
def sort_fields(fields):
|
||||
order = {name: i for i, name in enumerate(field_priority)}
|
||||
return sorted(fields, key=lambda f: (order.get(f[0], 1000), f[2]))
|
||||
|
||||
# Generate format classes
|
||||
lines = ["# autogenerated from AMD ISA PDF by pdf.py - do not edit", "# ruff: noqa: F401,F403",
|
||||
"from typing import Annotated",
|
||||
"from extra.assembly.amd.dsl import *",
|
||||
f"from extra.assembly.amd.autogen.{arch}.enum import *", "import functools", ""]
|
||||
for fmt_name, fields in sorted(formats.items()):
|
||||
lines.append(f"class {fmt_name}(Inst):")
|
||||
for name, hi, lo in sort_fields(fields):
|
||||
bits_str = f"bits[{hi}:{lo}]" if hi != lo else f"bits[{hi}]"
|
||||
if name == 'encoding' and fmt_name in encodings: lines.append(f" encoding = {bits_str} == 0b{encodings[fmt_name]}")
|
||||
else:
|
||||
ftype = field_type(name, fmt_name)
|
||||
lines.append(f" {name}{f':{ftype}' if ftype else ''} = {bits_str}")
|
||||
lines.append("")
|
||||
|
||||
# Generate instruction helpers
|
||||
lines.append("# instruction helpers")
|
||||
for cls_name, ops in sorted(enums.items()):
|
||||
fmt = cls_name[:-2]
|
||||
for op_val, name in sorted(ops.items()):
|
||||
seg = {"GLOBAL": ", seg=2", "SCRATCH": ", seg=1"}.get(fmt, "")
|
||||
tgt = {"GLOBAL": "FLAT, GLOBALOp", "SCRATCH": "FLAT, SCRATCHOp"}.get(fmt, f"{fmt}, {cls_name}")
|
||||
if fmt in formats or fmt in ("GLOBAL", "SCRATCH"):
|
||||
suffix = "_e32" if fmt in ("VOP1", "VOP2", "VOPC") else "_e64" if fmt == "VOP3" and op_val < 512 else ""
|
||||
if name in ('V_FMAMK_F32', 'V_FMAMK_F16'):
|
||||
lines.append(f"def {name.lower()}{suffix}(vdst, src0, K, vsrc1): return {fmt}({cls_name}.{name}, vdst, src0, vsrc1, literal=K)")
|
||||
elif name in ('V_FMAAK_F32', 'V_FMAAK_F16'):
|
||||
lines.append(f"def {name.lower()}{suffix}(vdst, src0, vsrc1, K): return {fmt}({cls_name}.{name}, vdst, src0, vsrc1, literal=K)")
|
||||
else: lines.append(f"{name.lower()}{suffix} = functools.partial({tgt}.{name}{seg})")
|
||||
src_names = {name for _, name in src_enum.items()}
|
||||
lines += [""] + [f"{name} = SrcEnum.{name}" for _, name in sorted(src_enum.items()) if name not in {'DPP8', 'DPP16'}]
|
||||
if "NULL" in src_names: lines.append("OFF = NULL\n")
|
||||
return '\n'.join(lines)
|
||||
for fmt_name, ops in sorted(enums.items()):
|
||||
seg = {"GLOBAL": ", seg=2", "SCRATCH": ", seg=1"}.get(fmt_name, "")
|
||||
tgt = {"GLOBAL": "FLAT, GLOBALOp", "SCRATCH": "FLAT, SCRATCHOp"}.get(fmt_name, f"{fmt_name}, {fmt_name}Op")
|
||||
suffix = "_e32" if fmt_name in ("VOP1", "VOP2", "VOPC") else "_e64" if fmt_name == "VOP3" and len(ops) > 0 else ""
|
||||
if fmt_name in formats or fmt_name in ("GLOBAL", "SCRATCH"):
|
||||
for op_val, name in sorted(ops.items()):
|
||||
fn_suffix = suffix if fmt_name != "VOP3" or op_val < 512 else ""
|
||||
lines.append(f"{name.lower()}{fn_suffix} = functools.partial({tgt}.{name}{seg})")
|
||||
|
||||
def _generate_str_pcode_py(enums, pseudocode, arch) -> str:
|
||||
"""Generate str_pcode.py content (raw pseudocode strings)."""
|
||||
# Get op enums for this arch (import from .ins which re-exports from .enum)
|
||||
import importlib
|
||||
autogen = importlib.import_module(f"extra.assembly.amd.autogen.{arch}.ins")
|
||||
OP_ENUMS = [getattr(autogen, name) for name in ['SOP1Op', 'SOP2Op', 'SOPCOp', 'SOPKOp', 'SOPPOp', 'SMEMOp', 'VOP1Op', 'VOP2Op', 'VOP3Op', 'VOP3SDOp', 'VOP3POp', 'VOPCOp', 'VOP3AOp', 'VOP3BOp', 'DSOp', 'FLATOp', 'GLOBALOp', 'SCRATCHOp'] if hasattr(autogen, name)]
|
||||
with open(path, "w") as f:
|
||||
f.write("\n".join(lines))
|
||||
|
||||
# Build defined ops mapping
|
||||
defined_ops: dict[tuple, list] = {}
|
||||
for enum_cls in OP_ENUMS:
|
||||
for op in enum_cls:
|
||||
if op.name.startswith(('S_', 'V_', 'DS_', 'FLAT_', 'GLOBAL_', 'SCRATCH_')): defined_ops.setdefault((op.name, op.value), []).append((enum_cls, op))
|
||||
|
||||
enum_names = [e.__name__ for e in OP_ENUMS]
|
||||
instructions: dict = {cls: {} for cls in OP_ENUMS}
|
||||
for key, pc in pseudocode.items():
|
||||
if key in defined_ops:
|
||||
for enum_cls, enum_val in defined_ops[key]: instructions[enum_cls][enum_val] = pc
|
||||
|
||||
# Build string dictionaries for each enum
|
||||
lines = [f'''# autogenerated by pdf.py - do not edit
|
||||
# to regenerate: python -m extra.assembly.amd.pdf --arch {arch}
|
||||
# ruff: noqa: E501
|
||||
from extra.assembly.amd.autogen.{arch}.enum import {", ".join(enum_names)}
|
||||
''']
|
||||
all_dict_entries: dict = {}
|
||||
for enum_cls in OP_ENUMS:
|
||||
cls_name = enum_cls.__name__
|
||||
if not instructions.get(enum_cls): continue
|
||||
dict_entries = [(op, repr(pc)) for op, pc in instructions[enum_cls].items()]
|
||||
if dict_entries:
|
||||
all_dict_entries[enum_cls] = dict_entries
|
||||
lines.append(f'{cls_name}_PCODE = {{')
|
||||
for op, escaped in dict_entries: lines.append(f" {cls_name}.{op.name}: {escaped},")
|
||||
lines.append('}\n')
|
||||
|
||||
lines.append('PSEUDOCODE_STRINGS = {')
|
||||
for enum_cls in OP_ENUMS:
|
||||
if all_dict_entries.get(enum_cls): lines.append(f' {enum_cls.__name__}: {enum_cls.__name__}_PCODE,')
|
||||
lines.append('}')
|
||||
return '\n'.join(lines)
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# MAIN GENERATION
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def generate_arch(arch: str) -> dict:
|
||||
"""Generate enum.py, ins.py and str_pcode.py for a single architecture."""
|
||||
urls = PDF_URLS[arch]
|
||||
if isinstance(urls, str): urls = [urls]
|
||||
|
||||
print(f"\n{'='*60}\nGenerating {arch}...")
|
||||
print(f"Parsing {len(urls)} PDF(s)...")
|
||||
results = [_parse_single_pdf(url) for url in urls]
|
||||
merged = _merge_results(results) if len(results) > 1 else results[0]
|
||||
doc_name = "+".join(merged["doc_names"]) if len(results) > 1 else merged["doc_name"]
|
||||
|
||||
base_path = Path(f"extra/assembly/amd/autogen/{arch}")
|
||||
base_path.mkdir(parents=True, exist_ok=True)
|
||||
(base_path / "__init__.py").touch()
|
||||
|
||||
# Write enum.py (enums only, no dsl.py dependency)
|
||||
enum_path = base_path / "enum.py"
|
||||
enum_content = _generate_enum_py(merged["enums"], merged["src_enum"], doc_name, merged.get("buf_fmt"))
|
||||
enum_path.write_text(enum_content)
|
||||
buf_fmt_count = len([v for v in merged.get("buf_fmt", {}) if 1 <= v <= 63])
|
||||
print(f"Generated {enum_path}: SrcEnum ({len(merged['src_enum'])}) + {len(merged['enums'])} enums" + (f" + BufFmt ({buf_fmt_count})" if buf_fmt_count else ""))
|
||||
|
||||
# Write ins.py (instruction formats and helpers, imports dsl.py and enum.py)
|
||||
ins_path = base_path / "ins.py"
|
||||
ins_content = _generate_ins_py(merged["formats"], merged["enums"], merged["src_enum"], doc_name).replace("{arch}", arch)
|
||||
ins_path.write_text(ins_content)
|
||||
print(f"Generated {ins_path}: {len(merged['formats'])} formats")
|
||||
|
||||
# Write str_pcode.py (needs enum.py to exist first for imports)
|
||||
pcode_path = base_path / "str_pcode.py"
|
||||
pcode_content = _generate_str_pcode_py(merged["enums"], merged["pseudocode"], arch)
|
||||
pcode_path.write_text(pcode_content)
|
||||
print(f"Generated {pcode_path}: {len(merged['pseudocode'])} instructions")
|
||||
|
||||
return merged
|
||||
|
||||
def _generate_arch_wrapper(arch: str):
|
||||
"""Wrapper for multiprocessing - returns arch name for ordering."""
|
||||
generate_arch(arch)
|
||||
return arch
|
||||
|
||||
def generate_all():
|
||||
"""Generate all architectures in parallel."""
|
||||
with ProcessPoolExecutor() as executor:
|
||||
list(executor.map(_generate_arch_wrapper, PDF_URLS.keys()))
|
||||
def write_pcode(pcode: dict[tuple[str, int], str], enums: dict[str, dict[int, str]], arch: str, path: str):
|
||||
"""Write str_pcode.py file from extracted pseudocode."""
|
||||
# Group pseudocode by enum class
|
||||
by_enum: dict[str, list[tuple[str, int, str]]] = {}
|
||||
for fmt_name, ops in enums.items():
|
||||
for opcode, name in ops.items():
|
||||
if (name, opcode) in pcode: by_enum.setdefault(f"{fmt_name}Op", []).append((name, opcode, pcode[(name, opcode)]))
|
||||
# Generate file
|
||||
enum_names = sorted(by_enum.keys())
|
||||
lines = [f"# autogenerated by pdf.py - do not edit", f"# to regenerate: python -m extra.assembly.amd.pdf",
|
||||
"# ruff: noqa: E501", f"from extra.assembly.amd.autogen.{arch}.enum import {', '.join(enum_names)}", ""]
|
||||
for enum_name in enum_names:
|
||||
lines.append(f"{enum_name}_PCODE = {{")
|
||||
for name, opcode, code in sorted(by_enum[enum_name], key=lambda x: x[1]):
|
||||
lines.append(f" {enum_name}.{name}: {code!r},")
|
||||
lines.append("}\n")
|
||||
lines.append(f"PSEUDOCODE_STRINGS = {{{', '.join(f'{e}: {e}_PCODE' for e in enum_names)}}}")
|
||||
with open(path, "w") as f:
|
||||
f.write("\n".join(lines))
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Generate AMD ISA autogen files from PDF documentation")
|
||||
parser.add_argument("--arch", choices=list(PDF_URLS.keys()) + ["all"], default="rdna3")
|
||||
args = parser.parse_args()
|
||||
if args.arch == "all": generate_all()
|
||||
else: generate_arch(args.arch)
|
||||
import pathlib
|
||||
for arch, url in PDF_URLS.items():
|
||||
print(f"Processing {arch}...")
|
||||
pages = extract(url)
|
||||
tables = extract_tables(pages)
|
||||
enums = extract_enums(tables)
|
||||
formats, encodings = extract_ins(tables)
|
||||
pcode = extract_pcode(pages, enums)
|
||||
# Fix known PDF errors
|
||||
if arch == 'rdna3':
|
||||
fixes = {'SOPP': {8: 'S_WAITCNT_DEPCTR', 58: 'S_TTRACEDATA', 59: 'S_TTRACEDATA_IMM'},
|
||||
'SOPK': {22: 'S_SUBVECTOR_LOOP_BEGIN', 23: 'S_SUBVECTOR_LOOP_END'},
|
||||
'SMEM': {34: 'S_ATC_PROBE', 35: 'S_ATC_PROBE_BUFFER'},
|
||||
'DS': {24: 'DS_GWS_SEMA_RELEASE_ALL', 25: 'DS_GWS_INIT', 26: 'DS_GWS_SEMA_V', 27: 'DS_GWS_SEMA_BR', 28: 'DS_GWS_SEMA_P', 29: 'DS_GWS_BARRIER'},
|
||||
'FLAT': {40: 'GLOBAL_LOAD_ADDTID_B32', 41: 'GLOBAL_STORE_ADDTID_B32', 55: 'FLAT_ATOMIC_CSUB_U32'}}
|
||||
for fmt, ops in fixes.items(): enums[fmt] = merge_dicts([enums[fmt], ops])
|
||||
if arch in ('rdna3', 'rdna4'):
|
||||
# RDNA SMEM: PDF says DLC=[14], GLC=[16] but hardware uses DLC=[13], GLC=[14]
|
||||
if 'SMEM' in formats:
|
||||
formats['SMEM'] = [(n, 13 if n == 'dlc' else 14 if n == 'glc' else h, 13 if n == 'dlc' else 14 if n == 'glc' else l)
|
||||
for n, h, l in formats['SMEM']]
|
||||
if arch == 'cdna':
|
||||
# CDNA DS: PDF is missing the GDS field (bit 16)
|
||||
if 'DS' in formats and not any(n == 'gds' for n, _, _ in formats['DS']):
|
||||
formats['DS'].append(('gds', 16, 16))
|
||||
# CDNA DPP/SDWA: PDF only documents modifier fields (bits[63:32]), need to add VOP overlay fields (bits[31:0])
|
||||
vop_overlay = [('encoding', 8, 0), ('vop_op', 16, 9), ('vdst', 24, 17), ('vop2_op', 31, 25)]
|
||||
if 'DPP' in formats and not any(n == 'encoding' for n, _, _ in formats['DPP']):
|
||||
formats['DPP'] = vop_overlay + [('bc' if n == 'bound_ctrl' else n, h, l) for n, h, l in formats['DPP']]
|
||||
encodings['DPP'] = '11111010'
|
||||
if 'SDWA' in formats and not any(n == 'encoding' for n, _, _ in formats['SDWA']):
|
||||
formats['SDWA'] = vop_overlay + [(n, h, l) for n, h, l in formats['SDWA']]
|
||||
encodings['SDWA'] = '11111001'
|
||||
base = pathlib.Path(__file__).parent / "autogen" / arch
|
||||
write_enums(enums, arch, base / "enum.py")
|
||||
write_ins(formats, encodings, enums, arch, base / "ins.py")
|
||||
write_pcode(pcode, enums, arch, base / "str_pcode.py")
|
||||
print(f" {len(tables)} tables, {len(pcode)} pcode -> {base}")
|
||||
|
||||
@@ -1615,7 +1615,7 @@ class TestCarryBorrow(unittest.TestCase):
|
||||
v_mov_b32_e32(v[2], s[2]),
|
||||
v_mov_b32_e32(v[3], s[3]),
|
||||
v_add_co_u32(v[4], VCC, v[0], v[2]),
|
||||
v_add_co_ci_u32_e32(v[5], VCC, v[1], v[3]),
|
||||
v_add_co_ci_u32_e32(v[5], v[1], v[3]),
|
||||
]
|
||||
st = run_program(instructions, n_lanes=1)
|
||||
self.assertEqual(st.vgpr[0][4], 0x00000000, "lo result")
|
||||
|
||||
@@ -271,7 +271,7 @@ class TestVOP3P(unittest.TestCase):
|
||||
s_mov_b32(s[1], 0x44004200), # hi=4.0, lo=3.0
|
||||
v_mov_b32_e32(v[0], s[0]),
|
||||
v_mov_b32_e32(v[1], s[1]),
|
||||
v_pk_add_f16(v[2], v[0], v[1]),
|
||||
v_pk_add_f16(v[2], v[0], v[1], opsel_hi=3, opsel_hi2=1),
|
||||
]
|
||||
st = run_program(instructions, n_lanes=1)
|
||||
result = st.vgpr[0][2]
|
||||
@@ -288,7 +288,7 @@ class TestVOP3P(unittest.TestCase):
|
||||
s_mov_b32(s[1], 0x45004400), # hi=5.0, lo=4.0
|
||||
v_mov_b32_e32(v[0], s[0]),
|
||||
v_mov_b32_e32(v[1], s[1]),
|
||||
v_pk_mul_f16(v[2], v[0], v[1]),
|
||||
v_pk_mul_f16(v[2], v[0], v[1], opsel_hi=3, opsel_hi2=1),
|
||||
]
|
||||
st = run_program(instructions, n_lanes=1)
|
||||
result = st.vgpr[0][2]
|
||||
@@ -307,7 +307,7 @@ class TestVOP3P(unittest.TestCase):
|
||||
v_mov_b32_e32(v[0], s[0]),
|
||||
v_mov_b32_e32(v[1], s[1]),
|
||||
v_mov_b32_e32(v[2], s[2]),
|
||||
v_pk_fma_f16(v[3], v[0], v[1], v[2]),
|
||||
v_pk_fma_f16(v[3], v[0], v[1], v[2], opsel_hi=3, opsel_hi2=1),
|
||||
]
|
||||
st = run_program(instructions, n_lanes=1)
|
||||
result = st.vgpr[0][3]
|
||||
@@ -325,7 +325,7 @@ class TestVOP3P(unittest.TestCase):
|
||||
instructions = [
|
||||
s_mov_b32(s[0], 0x3c003c00), # packed f16: hi=1.0, lo=1.0
|
||||
v_mov_b32_e32(v[0], s[0]),
|
||||
v_pk_add_f16(v[1], v[0], SrcEnum.POS_ONE), # Add inline constant 1.0
|
||||
v_pk_add_f16(v[1], v[0], SrcEnum.POS_ONE, opsel_hi=3, opsel_hi2=1), # Add inline constant 1.0
|
||||
]
|
||||
st = run_program(instructions, n_lanes=1)
|
||||
result = st.vgpr[0][1]
|
||||
@@ -345,7 +345,7 @@ class TestVOP3P(unittest.TestCase):
|
||||
instructions = [
|
||||
s_mov_b32(s[0], 0x44004200), # packed f16: hi=4.0, lo=3.0
|
||||
v_mov_b32_e32(v[0], s[0]),
|
||||
v_pk_mul_f16(v[1], v[0], SrcEnum.POS_TWO),
|
||||
v_pk_mul_f16(v[1], v[0], SrcEnum.POS_TWO, opsel_hi=3, opsel_hi2=1),
|
||||
]
|
||||
st = run_program(instructions, n_lanes=1)
|
||||
result = st.vgpr[0][1]
|
||||
@@ -486,12 +486,12 @@ class TestSpecialOps(unittest.TestCase):
|
||||
"""V_DOT2_F32_BF16 computes dot product of bf16 pairs."""
|
||||
# bf16 1.0 = 0x3f80, bf16 2.0 = 0x4000
|
||||
instructions = [
|
||||
s_mov_b32(s[0], 0x3f803f80), # packed bf16: 1.0, 1.0
|
||||
s_mov_b32(s[1], 0x40003f80), # packed bf16: 2.0, 1.0
|
||||
s_mov_b32(s[0], 0x3f803f80), # packed bf16: lo=1.0, hi=1.0
|
||||
s_mov_b32(s[1], 0x40003f80), # packed bf16: lo=1.0, hi=2.0
|
||||
v_mov_b32_e32(v[0], s[0]),
|
||||
v_mov_b32_e32(v[1], s[1]),
|
||||
v_mov_b32_e32(v[2], 0),
|
||||
v_dot2_f32_bf16(v[3], v[0], v[1], v[2]),
|
||||
v_dot2_f32_bf16(v[3], v[0], v[1], v[2], opsel_hi=3, opsel_hi2=1),
|
||||
]
|
||||
st = run_program(instructions, n_lanes=1)
|
||||
# 1.0*1.0 + 1.0*2.0 + 0 = 3.0
|
||||
@@ -510,7 +510,7 @@ class TestPackedMixedSigns(unittest.TestCase):
|
||||
s_mov_b32(s[1], 0x3c003c00), # packed: hi=1.0, lo=1.0
|
||||
v_mov_b32_e32(v[0], s[0]),
|
||||
v_mov_b32_e32(v[1], s[1]),
|
||||
v_pk_add_f16(v[2], v[0], v[1]),
|
||||
v_pk_add_f16(v[2], v[0], v[1], opsel_hi=3, opsel_hi2=1),
|
||||
]
|
||||
st = run_program(instructions, n_lanes=1)
|
||||
result = st.vgpr[0][2]
|
||||
|
||||
50
extra/assembly/amd/test/test_pdf.py
Normal file
50
extra/assembly/amd/test/test_pdf.py
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test pdf.py PDF parser and enum generation."""
|
||||
import unittest, tempfile, importlib.util
|
||||
from extra.assembly.amd.pdf import extract, extract_tables, extract_enums, write_enums, PDF_URLS
|
||||
|
||||
EXPECTED = {
|
||||
"rdna3": {"pages": 655, "tables": 115, "sop2_ops": 67, "sop2_first": "S_ADD_U32"},
|
||||
"rdna4": {"pages": 711, "tables": 125, "sop2_ops": 74, "sop2_first": "S_ADD_CO_U32"},
|
||||
"cdna": {"pages": 610, "tables": 104, "sop2_ops": 52, "sop2_first": "S_ADD_U32"},
|
||||
}
|
||||
|
||||
class TestPDF2(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.data = {name: extract(url) for name, url in PDF_URLS.items()}
|
||||
cls.tables = {name: extract_tables(pages) for name, pages in cls.data.items()}
|
||||
cls.enums = {name: extract_enums(cls.tables[name]) for name in PDF_URLS}
|
||||
|
||||
def test_page_counts(self):
|
||||
for name, exp in EXPECTED.items():
|
||||
self.assertEqual(len(self.data[name]), exp["pages"], f"{name} page count")
|
||||
|
||||
def test_table_counts(self):
|
||||
for name, exp in EXPECTED.items():
|
||||
self.assertEqual(len(self.tables[name]), exp["tables"], f"{name} table count")
|
||||
|
||||
def test_tables_sequential(self):
|
||||
for name in PDF_URLS:
|
||||
nums = sorted(self.tables[name].keys())
|
||||
missing = set(range(1, max(nums) + 1)) - set(nums)
|
||||
self.assertEqual(missing, set(), f"{name} missing tables: {missing}")
|
||||
|
||||
def test_generate_enums(self):
|
||||
for name, exp in EXPECTED.items():
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
|
||||
write_enums(self.enums[name], name, f.name)
|
||||
spec = importlib.util.spec_from_file_location("enum", f.name)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
# Check SOP2Op
|
||||
self.assertTrue(hasattr(mod, 'SOP2Op'), f"{name} missing SOP2Op")
|
||||
self.assertEqual(len(mod.SOP2Op), exp["sop2_ops"], f"{name} SOP2Op count")
|
||||
self.assertEqual(mod.SOP2Op(0).name, exp["sop2_first"], f"{name} SOP2Op first")
|
||||
# Check all enums have at least 2 ops
|
||||
for attr in dir(mod):
|
||||
if attr.endswith('Op'):
|
||||
self.assertGreaterEqual(len(getattr(mod, attr)), 2, f"{name} {attr} has too few ops")
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,150 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test that PDF parser correctly extracts format fields."""
|
||||
import unittest, os
|
||||
from extra.assembly.amd.autogen.rdna3.ins import SOP1, SOP2, SOPK, SOPP, VOP1, VOP2, VOP3SD, VOPC, FLAT, VOPD, SOP1Op, SOP2Op, VOP1Op, VOP3Op
|
||||
|
||||
# expected formats with key fields and whether they have ENCODING
|
||||
EXPECTED_FORMATS = {
|
||||
'DPP16': (['SRC0', 'DPP_CTRL', 'BANK_MASK', 'ROW_MASK'], False),
|
||||
'DPP8': (['SRC0', 'LANE_SEL0', 'LANE_SEL7'], False),
|
||||
'DS': (['OP', 'ADDR', 'DATA0', 'DATA1', 'VDST'], True),
|
||||
'EXP': (['EN', 'TARGET', 'VSRC0', 'VSRC1', 'VSRC2', 'VSRC3'], True),
|
||||
'FLAT': (['OP', 'ADDR', 'DATA', 'SADDR', 'VDST', 'OFFSET'], True),
|
||||
'LDSDIR': (['VDST', 'OP'], True),
|
||||
'MIMG': (['OP', 'VADDR', 'VDATA', 'SRSRC', 'DMASK'], True),
|
||||
'MTBUF': (['OP', 'VADDR', 'VDATA', 'SRSRC', 'FORMAT', 'SOFFSET'], True),
|
||||
'MUBUF': (['OP', 'VADDR', 'VDATA', 'SRSRC', 'SOFFSET'], True),
|
||||
'SMEM': (['OP', 'SBASE', 'SDATA', 'OFFSET', 'SOFFSET'], True),
|
||||
'SOP1': (['OP', 'SDST', 'SSRC0'], True),
|
||||
'SOP2': (['OP', 'SDST', 'SSRC0', 'SSRC1'], True),
|
||||
'SOPC': (['OP', 'SSRC0', 'SSRC1'], True),
|
||||
'SOPK': (['OP', 'SDST', 'SIMM16'], True),
|
||||
'SOPP': (['OP', 'SIMM16'], True),
|
||||
'VINTERP': (['OP', 'VDST', 'SRC0', 'SRC1', 'SRC2'], True),
|
||||
'VOP1': (['OP', 'VDST', 'SRC0'], True),
|
||||
'VOP2': (['OP', 'VDST', 'SRC0', 'VSRC1'], True),
|
||||
'VOP3': (['OP', 'VDST', 'SRC0', 'SRC1', 'SRC2'], True),
|
||||
'VOP3P': (['OP', 'VDST', 'SRC0', 'SRC1', 'SRC2'], True),
|
||||
'VOP3SD': (['OP', 'VDST', 'SDST', 'SRC0', 'SRC1', 'SRC2'], True),
|
||||
'VOPC': (['OP', 'SRC0', 'VSRC1'], True),
|
||||
'VOPD': (['OPX', 'OPY', 'SRCX0', 'SRCY0', 'VDSTX', 'VDSTY'], True),
|
||||
}
|
||||
|
||||
# Skip PDF parsing tests by default - only run with TEST_PDF_PARSER=1
|
||||
# These are slow (~5s) and only needed when regenerating autogen/
|
||||
@unittest.skipUnless(os.environ.get("TEST_PDF_PARSER"), "set TEST_PDF_PARSER=1 to run PDF parser tests")
|
||||
class TestPDFParserGenerate(unittest.TestCase):
|
||||
"""Test the PDF parser by running generate() and checking results."""
|
||||
|
||||
def test_pdf_parser(self):
|
||||
"""Single test that validates all PDF parser outputs."""
|
||||
from extra.assembly.amd.dsl import generate
|
||||
result = generate()
|
||||
|
||||
# test_all_formats_present
|
||||
for fmt_name in EXPECTED_FORMATS:
|
||||
self.assertIn(fmt_name, result["formats"], f"missing format {fmt_name}")
|
||||
|
||||
# test_format_count
|
||||
self.assertEqual(len(result["formats"]), 23)
|
||||
|
||||
# test_no_duplicate_fields
|
||||
for fmt_name, fields in result["formats"].items():
|
||||
field_names = [f[0] for f in fields]
|
||||
self.assertEqual(len(field_names), len(set(field_names)), f"{fmt_name} has duplicate fields: {field_names}")
|
||||
|
||||
# test_expected_fields
|
||||
for fmt_name, (expected_fields, has_encoding) in EXPECTED_FORMATS.items():
|
||||
fields = {f[0] for f in result["formats"].get(fmt_name, [])}
|
||||
for field in expected_fields:
|
||||
self.assertIn(field, fields, f"{fmt_name} missing {field}")
|
||||
if has_encoding:
|
||||
self.assertIn("ENCODING", fields, f"{fmt_name} should have ENCODING")
|
||||
else:
|
||||
self.assertNotIn("ENCODING", fields, f"{fmt_name} should not have ENCODING")
|
||||
|
||||
# test_vopd_no_dpp16_fields
|
||||
vopd_fields = {f[0] for f in result["formats"].get("VOPD", [])}
|
||||
for field in ['DPP_CTRL', 'BANK_MASK', 'ROW_MASK']:
|
||||
self.assertNotIn(field, vopd_fields, f"VOPD should not have {field}")
|
||||
|
||||
# test_dpp16_no_vinterp_fields
|
||||
dpp16_fields = {f[0] for f in result["formats"].get("DPP16", [])}
|
||||
for field in ['VDST', 'WAITEXP']:
|
||||
self.assertNotIn(field, dpp16_fields, f"DPP16 should not have {field}")
|
||||
|
||||
# test_sopp_no_smem_fields
|
||||
sopp_fields = {f[0] for f in result["formats"].get("SOPP", [])}
|
||||
for field in ['SBASE', 'SDATA']:
|
||||
self.assertNotIn(field, sopp_fields, f"SOPP should not have {field}")
|
||||
|
||||
class TestPDFParser(unittest.TestCase):
|
||||
"""Verify format classes have correct fields from PDF parsing."""
|
||||
|
||||
def test_sop2_fields(self):
|
||||
"""SOP2 should have op, sdst, ssrc0, ssrc1."""
|
||||
for field in ['op', 'sdst', 'ssrc0', 'ssrc1']:
|
||||
self.assertIn(field, SOP2._fields)
|
||||
self.assertEqual(SOP2._fields['op'].hi, 29)
|
||||
self.assertEqual(SOP2._fields['op'].lo, 23)
|
||||
|
||||
def test_sop1_fields(self):
|
||||
"""SOP1 should have op, sdst, ssrc0 with correct bit positions."""
|
||||
for field in ['op', 'sdst', 'ssrc0']:
|
||||
self.assertIn(field, SOP1._fields)
|
||||
self.assertNotIn('simm16', SOP1._fields)
|
||||
self.assertEqual(SOP1._fields['ssrc0'].hi, 7)
|
||||
self.assertEqual(SOP1._fields['ssrc0'].lo, 0)
|
||||
assert SOP1._encoding is not None
|
||||
self.assertEqual(SOP1._encoding[0].hi, 31)
|
||||
self.assertEqual(SOP1._encoding[1], 0b101111101)
|
||||
|
||||
def test_vop3sd_fields(self):
|
||||
"""VOP3SD should have all fields including src0/src1/src2 from page continuation."""
|
||||
for field in ['op', 'vdst', 'sdst', 'src0', 'src1', 'src2']:
|
||||
self.assertIn(field, VOP3SD._fields)
|
||||
self.assertEqual(VOP3SD._fields['src0'].hi, 40)
|
||||
self.assertEqual(VOP3SD._fields['src0'].lo, 32)
|
||||
self.assertEqual(VOP3SD._size(), 8)
|
||||
|
||||
def test_flat_has_vdst(self):
|
||||
"""FLAT should have vdst field."""
|
||||
self.assertIn('vdst', FLAT._fields)
|
||||
self.assertEqual(FLAT._fields['vdst'].hi, 63)
|
||||
self.assertEqual(FLAT._fields['vdst'].lo, 56)
|
||||
|
||||
def test_encoding_bits(self):
|
||||
"""Verify encoding bits are correct for major formats."""
|
||||
tests = [
|
||||
(SOP2, 31, 30, 0b10),
|
||||
(SOPK, 31, 28, 0b1011),
|
||||
(SOPP, 31, 23, 0b101111111),
|
||||
(VOP1, 31, 25, 0b0111111),
|
||||
(VOP2, 31, 31, 0b0),
|
||||
(VOPC, 31, 25, 0b0111110),
|
||||
(FLAT, 31, 26, 0b110111),
|
||||
]
|
||||
for cls, hi, lo, val in tests:
|
||||
assert cls._encoding is not None
|
||||
self.assertEqual(cls._encoding[0].hi, hi, f"{cls.__name__} encoding hi")
|
||||
self.assertEqual(cls._encoding[0].lo, lo, f"{cls.__name__} encoding lo")
|
||||
self.assertEqual(cls._encoding[1], val, f"{cls.__name__} encoding val")
|
||||
|
||||
def test_opcode_enums_exist(self):
|
||||
"""Verify opcode enums are generated with expected counts."""
|
||||
self.assertGreater(len(SOP1Op), 50)
|
||||
self.assertGreater(len(SOP2Op), 50)
|
||||
self.assertGreater(len(VOP1Op), 50)
|
||||
self.assertGreater(len(VOP3Op), 200)
|
||||
|
||||
def test_vopd_no_duplicate_fields(self):
|
||||
"""VOPD should not have duplicate fields and should not include DPP16 fields."""
|
||||
field_names = list(VOPD._fields.keys())
|
||||
self.assertEqual(len(field_names), len(set(field_names)))
|
||||
for field in ['srcx0', 'srcy0', 'opx', 'opy']:
|
||||
self.assertIn(field, VOPD._fields)
|
||||
for field in ['dpp_ctrl', 'bank_mask', 'row_mask']:
|
||||
self.assertNotIn(field, VOPD._fields)
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user