|
|
|
|
@@ -164,7 +164,7 @@ class VOP3(Inst):
|
|
|
|
|
|
|
|
|
|
class VOP3P(Inst):
|
|
|
|
|
encoding = FixedBitField(31, 23, 0b110100111)
|
|
|
|
|
op = EnumBitField(22, 16, VOP3POp, {VOP3POp.V_PK_MAD_I16, VOP3POp.V_PK_MUL_LO_U16, VOP3POp.V_PK_ADD_I16, VOP3POp.V_PK_SUB_I16, VOP3POp.V_PK_LSHLREV_B16, VOP3POp.V_PK_LSHRREV_B16, VOP3POp.V_PK_ASHRREV_I16, VOP3POp.V_PK_MAX_I16, VOP3POp.V_PK_MIN_I16, VOP3POp.V_PK_MAD_U16, VOP3POp.V_PK_ADD_U16, VOP3POp.V_PK_SUB_U16, VOP3POp.V_PK_MAX_U16, VOP3POp.V_PK_MIN_U16, VOP3POp.V_PK_FMA_F16, VOP3POp.V_PK_ADD_F16, VOP3POp.V_PK_MUL_F16, VOP3POp.V_PK_MIN_F16, VOP3POp.V_PK_MAX_F16, VOP3POp.V_DOT2_F32_BF16, VOP3POp.V_PK_MINIMUM3_F16, VOP3POp.V_PK_MAXIMUM3_F16, VOP3POp.V_MAD_MIX_F32, VOP3POp.V_MAD_MIXLO_F16, VOP3POp.V_MAD_MIXHI_F16, VOP3POp.V_DOT2_F32_F16, VOP3POp.V_DOT2_I32_I16, VOP3POp.V_DOT2_U32_U16, VOP3POp.V_DOT4_I32_I8, VOP3POp.V_DOT4_U32_U8, VOP3POp.V_DOT8_I32_I4, VOP3POp.V_DOT8_U32_U4, VOP3POp.V_MFMA_LD_SCALE_B32, VOP3POp.V_MFMA_F32_16X16X128_F8F6F4, VOP3POp.V_MFMA_F32_32X32X64_F8F6F4, VOP3POp.V_PK_FMA_F32, VOP3POp.V_PK_MUL_F32, VOP3POp.V_PK_ADD_F32, VOP3POp.V_PK_MOV_B32, VOP3POp.V_MFMA_F32_16X16X32_BF16, VOP3POp.V_MFMA_I32_16X16X64_I8, VOP3POp.V_MFMA_F32_32X32X16_BF16, VOP3POp.V_MFMA_I32_32X32X32_I8, VOP3POp.V_SMFMAC_F32_16X16X64_BF16, VOP3POp.V_SMFMAC_I32_16X16X128_I8, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_BF8, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_FP8, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_BF8, VOP3POp.V_MFMA_F32_16X16X8_XF32, VOP3POp.V_MFMA_F32_32X32X4_XF32, VOP3POp.V_MFMA_F32_32X32X1_2B_F32, VOP3POp.V_MFMA_F32_16X16X1_4B_F32, VOP3POp.V_MFMA_F32_4X4X1_16B_F32, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_FP8, VOP3POp.V_MFMA_F32_32X32X2_F32, VOP3POp.V_MFMA_F32_16X16X4_F32, VOP3POp.V_SMFMAC_F32_32X32X32_BF16, VOP3POp.V_SMFMAC_I32_32X32X64_I8, VOP3POp.V_MFMA_F32_32X32X4_2B_F16, VOP3POp.V_MFMA_F32_16X16X4_4B_F16, VOP3POp.V_MFMA_F32_4X4X4_16B_F16, VOP3POp.V_SMFMAC_F32_32X32X64_BF8_BF8, VOP3POp.V_MFMA_F32_32X32X8_F16, VOP3POp.V_MFMA_F32_16X16X16_F16, VOP3POp.V_SMFMAC_F32_32X32X64_BF8_FP8, VOP3POp.V_SMFMAC_F32_32X32X64_FP8_BF8, VOP3POp.V_MFMA_I32_32X32X4_2B_I8, VOP3POp.V_MFMA_I32_16X16X4_4B_I8, VOP3POp.V_MFMA_I32_4X4X4_16B_I8, VOP3POp.V_SMFMAC_F32_32X32X64_FP8_FP8, VOP3POp.V_MFMA_F32_16X16X32_F16, VOP3POp.V_MFMA_F32_32X32X16_F16, VOP3POp.V_MFMA_I32_32X32X16_I8, VOP3POp.V_MFMA_I32_16X16X32_I8, VOP3POp.V_ACCVGPR_READ, VOP3POp.V_ACCVGPR_WRITE, VOP3POp.V_SMFMAC_F32_16X16X64_F16, VOP3POp.V_SMFMAC_F32_32X32X32_F16, VOP3POp.V_MFMA_F32_32X32X4_2B_BF16, VOP3POp.V_MFMA_F32_16X16X4_4B_BF16, VOP3POp.V_MFMA_F32_4X4X4_16B_BF16, VOP3POp.V_MFMA_F32_32X32X8_BF16, VOP3POp.V_MFMA_F32_16X16X16_BF16, VOP3POp.V_SMFMAC_F32_16X16X32_F16, VOP3POp.V_SMFMAC_F32_32X32X16_F16, VOP3POp.V_SMFMAC_F32_16X16X32_BF16, VOP3POp.V_SMFMAC_F32_32X32X16_BF16, VOP3POp.V_SMFMAC_I32_16X16X64_I8, VOP3POp.V_SMFMAC_I32_32X32X32_I8, VOP3POp.V_MFMA_F64_16X16X4_F64, VOP3POp.V_MFMA_F64_4X4X4_4B_F64, VOP3POp.V_MFMA_F32_16X16X32_BF8_BF8, VOP3POp.V_MFMA_F32_16X16X32_BF8_FP8, VOP3POp.V_MFMA_F32_16X16X32_FP8_BF8, VOP3POp.V_MFMA_F32_16X16X32_FP8_FP8, VOP3POp.V_MFMA_F32_32X32X16_BF8_BF8, VOP3POp.V_MFMA_F32_32X32X16_BF8_FP8, VOP3POp.V_MFMA_F32_32X32X16_FP8_BF8, VOP3POp.V_MFMA_F32_32X32X16_FP8_FP8, VOP3POp.V_SMFMAC_F32_16X16X64_BF8_BF8, VOP3POp.V_SMFMAC_F32_16X16X64_BF8_FP8, VOP3POp.V_SMFMAC_F32_16X16X64_FP8_BF8, VOP3POp.V_SMFMAC_F32_16X16X64_FP8_FP8, VOP3POp.V_SMFMAC_F32_32X32X32_BF8_BF8, VOP3POp.V_SMFMAC_F32_32X32X32_BF8_FP8, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_BF8, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_FP8})
|
|
|
|
|
op = EnumBitField(22, 16, VOP3POp, {VOP3POp.V_PK_MAD_I16, VOP3POp.V_PK_MUL_LO_U16, VOP3POp.V_PK_ADD_I16, VOP3POp.V_PK_SUB_I16, VOP3POp.V_PK_LSHLREV_B16, VOP3POp.V_PK_LSHRREV_B16, VOP3POp.V_PK_ASHRREV_I16, VOP3POp.V_PK_MAX_I16, VOP3POp.V_PK_MIN_I16, VOP3POp.V_PK_MAD_U16, VOP3POp.V_PK_ADD_U16, VOP3POp.V_PK_SUB_U16, VOP3POp.V_PK_MAX_U16, VOP3POp.V_PK_MIN_U16, VOP3POp.V_PK_FMA_F16, VOP3POp.V_PK_ADD_F16, VOP3POp.V_PK_MUL_F16, VOP3POp.V_PK_MIN_F16, VOP3POp.V_PK_MAX_F16, VOP3POp.V_DOT2_F32_BF16, VOP3POp.V_PK_MINIMUM3_F16, VOP3POp.V_PK_MAXIMUM3_F16, VOP3POp.V_MAD_MIX_F32, VOP3POp.V_MAD_MIXLO_F16, VOP3POp.V_MAD_MIXHI_F16, VOP3POp.V_DOT2_F32_F16, VOP3POp.V_DOT2_I32_I16, VOP3POp.V_DOT2_U32_U16, VOP3POp.V_DOT4_I32_I8, VOP3POp.V_DOT4_U32_U8, VOP3POp.V_DOT8_I32_I4, VOP3POp.V_DOT8_U32_U4, VOP3POp.V_MFMA_LD_SCALE_B32, VOP3POp.V_PK_FMA_F32, VOP3POp.V_PK_MUL_F32, VOP3POp.V_PK_ADD_F32, VOP3POp.V_PK_MOV_B32, VOP3POp.V_MFMA_F32_16X16X8_XF32, VOP3POp.V_MFMA_F32_32X32X4_XF32, VOP3POp.V_ACCVGPR_READ, VOP3POp.V_ACCVGPR_WRITE})
|
|
|
|
|
vdst = VGPRField(7, 0)
|
|
|
|
|
src0 = SrcField(40, 32)
|
|
|
|
|
src1 = SrcField(49, 41)
|
|
|
|
|
@@ -310,6 +310,7 @@ class VOP2_SDWA_SDST(VOP2):
|
|
|
|
|
s1 = BitField(63, 63)
|
|
|
|
|
|
|
|
|
|
class VOP3P_MFMA(VOP3P):
|
|
|
|
|
op = EnumBitField(22, 16, VOP3POp, {VOP3POp.V_MFMA_F32_16X16X128_F8F6F4, VOP3POp.V_MFMA_F32_32X32X64_F8F6F4, VOP3POp.V_MFMA_F32_16X16X32_BF16, VOP3POp.V_MFMA_I32_16X16X64_I8, VOP3POp.V_MFMA_F32_32X32X16_BF16, VOP3POp.V_MFMA_I32_32X32X32_I8, VOP3POp.V_SMFMAC_F32_16X16X64_BF16, VOP3POp.V_SMFMAC_I32_16X16X128_I8, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_BF8, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_FP8, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_BF8, VOP3POp.V_MFMA_F32_32X32X1_2B_F32, VOP3POp.V_MFMA_F32_16X16X1_4B_F32, VOP3POp.V_MFMA_F32_4X4X1_16B_F32, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_FP8, VOP3POp.V_MFMA_F32_32X32X2_F32, VOP3POp.V_MFMA_F32_16X16X4_F32, VOP3POp.V_SMFMAC_F32_32X32X32_BF16, VOP3POp.V_SMFMAC_I32_32X32X64_I8, VOP3POp.V_MFMA_F32_32X32X4_2B_F16, VOP3POp.V_MFMA_F32_16X16X4_4B_F16, VOP3POp.V_MFMA_F32_4X4X4_16B_F16, VOP3POp.V_SMFMAC_F32_32X32X64_BF8_BF8, VOP3POp.V_MFMA_F32_32X32X8_F16, VOP3POp.V_MFMA_F32_16X16X16_F16, VOP3POp.V_SMFMAC_F32_32X32X64_BF8_FP8, VOP3POp.V_SMFMAC_F32_32X32X64_FP8_BF8, VOP3POp.V_MFMA_I32_32X32X4_2B_I8, VOP3POp.V_MFMA_I32_16X16X4_4B_I8, VOP3POp.V_MFMA_I32_4X4X4_16B_I8, VOP3POp.V_SMFMAC_F32_32X32X64_FP8_FP8, VOP3POp.V_MFMA_F32_16X16X32_F16, VOP3POp.V_MFMA_F32_32X32X16_F16, VOP3POp.V_MFMA_I32_32X32X16_I8, VOP3POp.V_MFMA_I32_16X16X32_I8, VOP3POp.V_SMFMAC_F32_16X16X64_F16, VOP3POp.V_SMFMAC_F32_32X32X32_F16, VOP3POp.V_MFMA_F32_32X32X4_2B_BF16, VOP3POp.V_MFMA_F32_16X16X4_4B_BF16, VOP3POp.V_MFMA_F32_4X4X4_16B_BF16, VOP3POp.V_MFMA_F32_32X32X8_BF16, VOP3POp.V_MFMA_F32_16X16X16_BF16, VOP3POp.V_SMFMAC_F32_16X16X32_F16, VOP3POp.V_SMFMAC_F32_32X32X16_F16, VOP3POp.V_SMFMAC_F32_16X16X32_BF16, VOP3POp.V_SMFMAC_F32_32X32X16_BF16, VOP3POp.V_SMFMAC_I32_16X16X64_I8, VOP3POp.V_SMFMAC_I32_32X32X32_I8, VOP3POp.V_MFMA_F64_16X16X4_F64, VOP3POp.V_MFMA_F64_4X4X4_4B_F64, VOP3POp.V_MFMA_F32_16X16X32_BF8_BF8, VOP3POp.V_MFMA_F32_16X16X32_BF8_FP8, VOP3POp.V_MFMA_F32_16X16X32_FP8_BF8, VOP3POp.V_MFMA_F32_16X16X32_FP8_FP8, VOP3POp.V_MFMA_F32_32X32X16_BF8_BF8, VOP3POp.V_MFMA_F32_32X32X16_BF8_FP8, VOP3POp.V_MFMA_F32_32X32X16_FP8_BF8, VOP3POp.V_MFMA_F32_32X32X16_FP8_FP8, VOP3POp.V_SMFMAC_F32_16X16X64_BF8_BF8, VOP3POp.V_SMFMAC_F32_16X16X64_BF8_FP8, VOP3POp.V_SMFMAC_F32_16X16X64_FP8_BF8, VOP3POp.V_SMFMAC_F32_16X16X64_FP8_FP8, VOP3POp.V_SMFMAC_F32_32X32X32_BF8_BF8, VOP3POp.V_SMFMAC_F32_32X32X32_BF8_FP8, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_BF8, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_FP8})
|
|
|
|
|
cbsz = BitField(10, 8)
|
|
|
|
|
abid = BitField(14, 11)
|
|
|
|
|
acc_cd = BitField(15, 15)
|
|
|
|
|
@@ -1648,80 +1649,80 @@ v_dot4_u32_u8 = functools.partial(VOP3P, VOP3POp.V_DOT4_U32_U8)
|
|
|
|
|
v_dot8_i32_i4 = functools.partial(VOP3P, VOP3POp.V_DOT8_I32_I4)
|
|
|
|
|
v_dot8_u32_u4 = functools.partial(VOP3P, VOP3POp.V_DOT8_U32_U4)
|
|
|
|
|
v_mfma_ld_scale_b32 = functools.partial(VOP3P, VOP3POp.V_MFMA_LD_SCALE_B32)
|
|
|
|
|
v_mfma_f32_16x16x128_f8f6f4 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X128_F8F6F4)
|
|
|
|
|
v_mfma_f32_32x32x64_f8f6f4 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X64_F8F6F4)
|
|
|
|
|
v_mfma_f32_16x16x128_f8f6f4 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X128_F8F6F4)
|
|
|
|
|
v_mfma_f32_32x32x64_f8f6f4 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X64_F8F6F4)
|
|
|
|
|
v_pk_fma_f32 = functools.partial(VOP3P, VOP3POp.V_PK_FMA_F32)
|
|
|
|
|
v_pk_mul_f32 = functools.partial(VOP3P, VOP3POp.V_PK_MUL_F32)
|
|
|
|
|
v_pk_add_f32 = functools.partial(VOP3P, VOP3POp.V_PK_ADD_F32)
|
|
|
|
|
v_pk_mov_b32 = functools.partial(VOP3P, VOP3POp.V_PK_MOV_B32)
|
|
|
|
|
v_mfma_f32_16x16x32_bf16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X32_BF16)
|
|
|
|
|
v_mfma_i32_16x16x64_i8 = functools.partial(VOP3P, VOP3POp.V_MFMA_I32_16X16X64_I8)
|
|
|
|
|
v_mfma_f32_32x32x16_bf16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X16_BF16)
|
|
|
|
|
v_mfma_i32_32x32x32_i8 = functools.partial(VOP3P, VOP3POp.V_MFMA_I32_32X32X32_I8)
|
|
|
|
|
v_smfmac_f32_16x16x64_bf16 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X64_BF16)
|
|
|
|
|
v_smfmac_i32_16x16x128_i8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_I32_16X16X128_I8)
|
|
|
|
|
v_smfmac_f32_16x16x128_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_BF8)
|
|
|
|
|
v_smfmac_f32_16x16x128_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_FP8)
|
|
|
|
|
v_smfmac_f32_16x16x128_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_BF8)
|
|
|
|
|
v_mfma_f32_16x16x32_bf16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X32_BF16)
|
|
|
|
|
v_mfma_i32_16x16x64_i8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_I32_16X16X64_I8)
|
|
|
|
|
v_mfma_f32_32x32x16_bf16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X16_BF16)
|
|
|
|
|
v_mfma_i32_32x32x32_i8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_I32_32X32X32_I8)
|
|
|
|
|
v_smfmac_f32_16x16x64_bf16 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X64_BF16)
|
|
|
|
|
v_smfmac_i32_16x16x128_i8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_I32_16X16X128_I8)
|
|
|
|
|
v_smfmac_f32_16x16x128_bf8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_BF8)
|
|
|
|
|
v_smfmac_f32_16x16x128_bf8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_FP8)
|
|
|
|
|
v_smfmac_f32_16x16x128_fp8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_BF8)
|
|
|
|
|
v_mfma_f32_16x16x8_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X8_XF32)
|
|
|
|
|
v_mfma_f32_32x32x4_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X4_XF32)
|
|
|
|
|
v_mfma_f32_32x32x1_2b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X1_2B_F32)
|
|
|
|
|
v_mfma_f32_16x16x1_4b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X1_4B_F32)
|
|
|
|
|
v_mfma_f32_4x4x1_16b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_4X4X1_16B_F32)
|
|
|
|
|
v_smfmac_f32_16x16x128_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_FP8)
|
|
|
|
|
v_mfma_f32_32x32x2_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X2_F32)
|
|
|
|
|
v_mfma_f32_16x16x4_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X4_F32)
|
|
|
|
|
v_smfmac_f32_32x32x32_bf16 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_BF16)
|
|
|
|
|
v_smfmac_i32_32x32x64_i8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_I32_32X32X64_I8)
|
|
|
|
|
v_mfma_f32_32x32x4_2b_f16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X4_2B_F16)
|
|
|
|
|
v_mfma_f32_16x16x4_4b_f16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X4_4B_F16)
|
|
|
|
|
v_mfma_f32_4x4x4_16b_f16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_4X4X4_16B_F16)
|
|
|
|
|
v_smfmac_f32_32x32x64_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X64_BF8_BF8)
|
|
|
|
|
v_mfma_f32_32x32x8_f16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X8_F16)
|
|
|
|
|
v_mfma_f32_16x16x16_f16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X16_F16)
|
|
|
|
|
v_smfmac_f32_32x32x64_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X64_BF8_FP8)
|
|
|
|
|
v_smfmac_f32_32x32x64_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X64_FP8_BF8)
|
|
|
|
|
v_mfma_i32_32x32x4_2b_i8 = functools.partial(VOP3P, VOP3POp.V_MFMA_I32_32X32X4_2B_I8)
|
|
|
|
|
v_mfma_i32_16x16x4_4b_i8 = functools.partial(VOP3P, VOP3POp.V_MFMA_I32_16X16X4_4B_I8)
|
|
|
|
|
v_mfma_i32_4x4x4_16b_i8 = functools.partial(VOP3P, VOP3POp.V_MFMA_I32_4X4X4_16B_I8)
|
|
|
|
|
v_smfmac_f32_32x32x64_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X64_FP8_FP8)
|
|
|
|
|
v_mfma_f32_16x16x32_f16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X32_F16)
|
|
|
|
|
v_mfma_f32_32x32x16_f16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X16_F16)
|
|
|
|
|
v_mfma_i32_32x32x16_i8 = functools.partial(VOP3P, VOP3POp.V_MFMA_I32_32X32X16_I8)
|
|
|
|
|
v_mfma_i32_16x16x32_i8 = functools.partial(VOP3P, VOP3POp.V_MFMA_I32_16X16X32_I8)
|
|
|
|
|
v_mfma_f32_32x32x1_2b_f32 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X1_2B_F32)
|
|
|
|
|
v_mfma_f32_16x16x1_4b_f32 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X1_4B_F32)
|
|
|
|
|
v_mfma_f32_4x4x1_16b_f32 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_4X4X1_16B_F32)
|
|
|
|
|
v_smfmac_f32_16x16x128_fp8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_FP8)
|
|
|
|
|
v_mfma_f32_32x32x2_f32 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X2_F32)
|
|
|
|
|
v_mfma_f32_16x16x4_f32 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X4_F32)
|
|
|
|
|
v_smfmac_f32_32x32x32_bf16 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X32_BF16)
|
|
|
|
|
v_smfmac_i32_32x32x64_i8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_I32_32X32X64_I8)
|
|
|
|
|
v_mfma_f32_32x32x4_2b_f16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X4_2B_F16)
|
|
|
|
|
v_mfma_f32_16x16x4_4b_f16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X4_4B_F16)
|
|
|
|
|
v_mfma_f32_4x4x4_16b_f16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_4X4X4_16B_F16)
|
|
|
|
|
v_smfmac_f32_32x32x64_bf8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X64_BF8_BF8)
|
|
|
|
|
v_mfma_f32_32x32x8_f16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X8_F16)
|
|
|
|
|
v_mfma_f32_16x16x16_f16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X16_F16)
|
|
|
|
|
v_smfmac_f32_32x32x64_bf8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X64_BF8_FP8)
|
|
|
|
|
v_smfmac_f32_32x32x64_fp8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X64_FP8_BF8)
|
|
|
|
|
v_mfma_i32_32x32x4_2b_i8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_I32_32X32X4_2B_I8)
|
|
|
|
|
v_mfma_i32_16x16x4_4b_i8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_I32_16X16X4_4B_I8)
|
|
|
|
|
v_mfma_i32_4x4x4_16b_i8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_I32_4X4X4_16B_I8)
|
|
|
|
|
v_smfmac_f32_32x32x64_fp8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X64_FP8_FP8)
|
|
|
|
|
v_mfma_f32_16x16x32_f16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X32_F16)
|
|
|
|
|
v_mfma_f32_32x32x16_f16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X16_F16)
|
|
|
|
|
v_mfma_i32_32x32x16_i8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_I32_32X32X16_I8)
|
|
|
|
|
v_mfma_i32_16x16x32_i8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_I32_16X16X32_I8)
|
|
|
|
|
v_accvgpr_read = functools.partial(VOP3P, VOP3POp.V_ACCVGPR_READ)
|
|
|
|
|
v_accvgpr_write = functools.partial(VOP3P, VOP3POp.V_ACCVGPR_WRITE)
|
|
|
|
|
v_smfmac_f32_16x16x64_f16 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X64_F16)
|
|
|
|
|
v_smfmac_f32_32x32x32_f16 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_F16)
|
|
|
|
|
v_mfma_f32_32x32x4_2b_bf16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X4_2B_BF16)
|
|
|
|
|
v_mfma_f32_16x16x4_4b_bf16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X4_4B_BF16)
|
|
|
|
|
v_mfma_f32_4x4x4_16b_bf16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_4X4X4_16B_BF16)
|
|
|
|
|
v_mfma_f32_32x32x8_bf16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X8_BF16)
|
|
|
|
|
v_mfma_f32_16x16x16_bf16 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X16_BF16)
|
|
|
|
|
v_smfmac_f32_16x16x32_f16 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X32_F16)
|
|
|
|
|
v_smfmac_f32_32x32x16_f16 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X16_F16)
|
|
|
|
|
v_smfmac_f32_16x16x32_bf16 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X32_BF16)
|
|
|
|
|
v_smfmac_f32_32x32x16_bf16 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X16_BF16)
|
|
|
|
|
v_smfmac_i32_16x16x64_i8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_I32_16X16X64_I8)
|
|
|
|
|
v_smfmac_i32_32x32x32_i8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_I32_32X32X32_I8)
|
|
|
|
|
v_mfma_f64_16x16x4_f64 = functools.partial(VOP3P, VOP3POp.V_MFMA_F64_16X16X4_F64)
|
|
|
|
|
v_mfma_f64_4x4x4_4b_f64 = functools.partial(VOP3P, VOP3POp.V_MFMA_F64_4X4X4_4B_F64)
|
|
|
|
|
v_mfma_f32_16x16x32_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X32_BF8_BF8)
|
|
|
|
|
v_mfma_f32_16x16x32_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X32_BF8_FP8)
|
|
|
|
|
v_mfma_f32_16x16x32_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X32_FP8_BF8)
|
|
|
|
|
v_mfma_f32_16x16x32_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X32_FP8_FP8)
|
|
|
|
|
v_mfma_f32_32x32x16_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X16_BF8_BF8)
|
|
|
|
|
v_mfma_f32_32x32x16_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X16_BF8_FP8)
|
|
|
|
|
v_mfma_f32_32x32x16_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X16_FP8_BF8)
|
|
|
|
|
v_mfma_f32_32x32x16_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X16_FP8_FP8)
|
|
|
|
|
v_smfmac_f32_16x16x64_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X64_BF8_BF8)
|
|
|
|
|
v_smfmac_f32_16x16x64_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X64_BF8_FP8)
|
|
|
|
|
v_smfmac_f32_16x16x64_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X64_FP8_BF8)
|
|
|
|
|
v_smfmac_f32_16x16x64_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X64_FP8_FP8)
|
|
|
|
|
v_smfmac_f32_32x32x32_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_BF8_BF8)
|
|
|
|
|
v_smfmac_f32_32x32x32_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_BF8_FP8)
|
|
|
|
|
v_smfmac_f32_32x32x32_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_BF8)
|
|
|
|
|
v_smfmac_f32_32x32x32_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_FP8)
|
|
|
|
|
v_smfmac_f32_16x16x64_f16 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X64_F16)
|
|
|
|
|
v_smfmac_f32_32x32x32_f16 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X32_F16)
|
|
|
|
|
v_mfma_f32_32x32x4_2b_bf16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X4_2B_BF16)
|
|
|
|
|
v_mfma_f32_16x16x4_4b_bf16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X4_4B_BF16)
|
|
|
|
|
v_mfma_f32_4x4x4_16b_bf16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_4X4X4_16B_BF16)
|
|
|
|
|
v_mfma_f32_32x32x8_bf16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X8_BF16)
|
|
|
|
|
v_mfma_f32_16x16x16_bf16 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X16_BF16)
|
|
|
|
|
v_smfmac_f32_16x16x32_f16 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X32_F16)
|
|
|
|
|
v_smfmac_f32_32x32x16_f16 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X16_F16)
|
|
|
|
|
v_smfmac_f32_16x16x32_bf16 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X32_BF16)
|
|
|
|
|
v_smfmac_f32_32x32x16_bf16 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X16_BF16)
|
|
|
|
|
v_smfmac_i32_16x16x64_i8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_I32_16X16X64_I8)
|
|
|
|
|
v_smfmac_i32_32x32x32_i8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_I32_32X32X32_I8)
|
|
|
|
|
v_mfma_f64_16x16x4_f64 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F64_16X16X4_F64)
|
|
|
|
|
v_mfma_f64_4x4x4_4b_f64 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F64_4X4X4_4B_F64)
|
|
|
|
|
v_mfma_f32_16x16x32_bf8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X32_BF8_BF8)
|
|
|
|
|
v_mfma_f32_16x16x32_bf8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X32_BF8_FP8)
|
|
|
|
|
v_mfma_f32_16x16x32_fp8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X32_FP8_BF8)
|
|
|
|
|
v_mfma_f32_16x16x32_fp8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_16X16X32_FP8_FP8)
|
|
|
|
|
v_mfma_f32_32x32x16_bf8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X16_BF8_BF8)
|
|
|
|
|
v_mfma_f32_32x32x16_bf8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X16_BF8_FP8)
|
|
|
|
|
v_mfma_f32_32x32x16_fp8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X16_FP8_BF8)
|
|
|
|
|
v_mfma_f32_32x32x16_fp8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_MFMA_F32_32X32X16_FP8_FP8)
|
|
|
|
|
v_smfmac_f32_16x16x64_bf8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X64_BF8_BF8)
|
|
|
|
|
v_smfmac_f32_16x16x64_bf8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X64_BF8_FP8)
|
|
|
|
|
v_smfmac_f32_16x16x64_fp8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X64_FP8_BF8)
|
|
|
|
|
v_smfmac_f32_16x16x64_fp8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_16X16X64_FP8_FP8)
|
|
|
|
|
v_smfmac_f32_32x32x32_bf8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X32_BF8_BF8)
|
|
|
|
|
v_smfmac_f32_32x32x32_bf8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X32_BF8_FP8)
|
|
|
|
|
v_smfmac_f32_32x32x32_fp8_bf8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_BF8)
|
|
|
|
|
v_smfmac_f32_32x32x32_fp8_fp8 = functools.partial(VOP3P_MFMA, VOP3POp.V_SMFMAC_F32_32X32X32_FP8_FP8)
|
|
|
|
|
v_mfma_scale_f32_16x16x128_f8f6f4 = functools.partial(VOP3PX2, VOP3PX2Op.V_MFMA_SCALE_F32_16X16X128_F8F6F4)
|
|
|
|
|
v_mfma_scale_f32_32x32x64_f8f6f4 = functools.partial(VOP3PX2, VOP3PX2Op.V_MFMA_SCALE_F32_32X32X64_F8F6F4)
|
|
|
|
|
v_add_co_u32 = functools.partial(VOP3SD, VOP3SDOp.V_ADD_CO_U32)
|
|
|
|
|
|