mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-07 22:23:55 -05:00
* assembly/amd: add pcode ds ops * refactors * fix ds op * update autogen * fix flat bug * more tests * fix emu test * that's a hack * generic * fix all tests * two tests * fix test failure * better * remove __all__
8878 lines
352 KiB
Python
8878 lines
352 KiB
Python
# autogenerated by pdf.py - do not edit
|
|
# to regenerate: python -m extra.assembly.amd.pdf --arch cdna
|
|
# ruff: noqa: E501
|
|
# mypy: ignore-errors
|
|
from extra.assembly.amd.autogen.cdna.enum import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3POp, VOPCOp, VOP3AOp, VOP3BOp, DSOp, FLATOp, GLOBALOp, SCRATCHOp
|
|
from extra.assembly.amd.pcode import ABSDIFF, BYTE_PERMUTE, DENORM, F, INF, OVERFLOW_F32, OVERFLOW_F64, PI, ROUND_MODE, Reg, SAT8, TWO_OVER_PI_1201, UNDERFLOW_F32, UNDERFLOW_F64, WAVE_MODE, _pack, _pack32, bf16_to_f32, cos, cvtToQuietNAN, exponent, f16_to_f32, f16_to_i16, f16_to_snorm, f16_to_u16, f16_to_unorm, f32_to_bf16, f32_to_f16, f32_to_f64, f32_to_i32, f32_to_snorm, f32_to_u32, f32_to_u8, f32_to_unorm, f64_to_f32, f64_to_i32, f64_to_u32, floor, fma, fract, i16_to_f16, i32_to_f32, i32_to_f64, i32_to_i16, isEven, isNAN, isQuietNAN, isSignalNAN, ldexp, log2, mantissa, pow, s_ff1_i32_b64, sign, signext, signext_from_bit, sin, sqrt, trunc, u16_to_f16, u32_to_f32, u32_to_f64, u32_to_u16, u4_to_u32, u8_to_u32, v_max3_f16, v_max3_f32, v_max3_i16, v_max3_i32, v_max3_u16, v_max3_u32, v_max_f16, v_max_f32, v_max_i16, v_max_i32, v_max_u16, v_max_u32, v_min3_f16, v_min3_f32, v_min_f16, v_min_f32, v_min_i16, v_min_i32, v_min_u16, v_min_u32, v_msad_u8, v_sad_u8
|
|
|
|
def _SOP1Op_S_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.b32 = S0.b32
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.b64 = S0.b64
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_CMOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if SCC:
|
|
D0.b32 = S0.b32
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_CMOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if SCC:
|
|
D0.b64 = S0.b64
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ~S0.u32
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP1Op_S_NOT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = ~S0.u64
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP1Op_S_WQM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
for i in range(0, int(31)+1):
|
|
tmp[i] = S0.u32[(i & 60) + (4) - 1 : (i & 60)] != 0
|
|
D0.u32 = tmp
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP1Op_S_WQM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
for i in range(0, int(63)+1):
|
|
tmp[i] = S0.u64[(i & 60) + (4) - 1 : (i & 60)] != 0
|
|
D0.u64 = tmp
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP1Op_S_BREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32[31 : 0] = S0.u32[0 : 31]
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_BREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[63 : 0] = S0.u64[0 : 63]
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_BCNT0_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
for i in range(0, int(31)+1):
|
|
tmp += ((1) if (S0.u32[i] == 0) else (0))
|
|
D0.i32 = tmp
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP1Op_S_BCNT0_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
for i in range(0, int(63)+1):
|
|
tmp += ((1) if (S0.u64[i] == 0) else (0))
|
|
D0.i32 = tmp
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP1Op_S_BCNT1_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
for i in range(0, int(31)+1):
|
|
tmp += ((1) if (S0.u32[i] == 1) else (0))
|
|
D0.i32 = tmp
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP1Op_S_BCNT1_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
for i in range(0, int(63)+1):
|
|
tmp += ((1) if (S0.u64[i] == 1) else (0))
|
|
D0.i32 = tmp
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP1Op_S_FF0_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(-1)
|
|
for i in range(0, int(31)+1):
|
|
if S0.u32[i] == 0:
|
|
tmp = Reg(i); break
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_FF0_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(-1)
|
|
for i in range(0, int(63)+1):
|
|
if S0.u64[i] == 0:
|
|
tmp = Reg(i); break
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_FF1_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(-1)
|
|
for i in range(0, int(31)+1):
|
|
if S0.u32[i] == 1:
|
|
tmp = Reg(i); break
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_FF1_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(-1)
|
|
for i in range(0, int(63)+1):
|
|
if S0.u64[i] == 1:
|
|
tmp = Reg(i); break
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_FLBIT_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(-1)
|
|
for i in range(0, int(31)+1):
|
|
if S0.u32[31 - i] == 1:
|
|
tmp = Reg(i); break
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_FLBIT_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(-1)
|
|
for i in range(0, int(63)+1):
|
|
if S0.u64[63 - i] == 1:
|
|
tmp = Reg(i); break
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_FLBIT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(-1)
|
|
for i in range(1, int(31)+1):
|
|
if S0.u32[31 - i] != S0.u32[31]:
|
|
tmp = Reg(i); break
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_FLBIT_I32_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(-1)
|
|
for i in range(1, int(63)+1):
|
|
if S0.u64[63 - i] != S0.u64[63]:
|
|
tmp = Reg(i); break
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_SEXT_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (signext(S0.i8))
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_SEXT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (signext(S0.i16))
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_BITSET0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32[S0.u32[4 : 0]] = 0
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_BITSET0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[S0.u32[5 : 0]] = 0
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_BITSET1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32[S0.u32[4 : 0]] = 1
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_BITSET1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[S0.u32[5 : 0]] = 1
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_GETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i64 = PC + 4
|
|
return {'D0': D0}
|
|
|
|
def _SOP1Op_S_SETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
PC = Reg(S0.i64)
|
|
return {'PC': PC}
|
|
|
|
def _SOP1Op_S_SWAPPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
jump_addr = S0.i64
|
|
D0.i64 = PC + 4
|
|
PC = Reg(jump_addr.i64)
|
|
return {'D0': D0, 'PC': PC}
|
|
|
|
def _SOP1Op_S_RFE_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
PC = Reg(S0.i64)
|
|
return {'PC': PC}
|
|
|
|
def _SOP1Op_S_AND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
saveexec = Reg(EXEC.u64)
|
|
EXEC.u64 = (S0.u64 & EXEC.u64)
|
|
D0.u64 = saveexec.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_OR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
saveexec = Reg(EXEC.u64)
|
|
EXEC.u64 = (S0.u64 | EXEC.u64)
|
|
D0.u64 = saveexec.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_XOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
saveexec = Reg(EXEC.u64)
|
|
EXEC.u64 = (S0.u64 ^ EXEC.u64)
|
|
D0.u64 = saveexec.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_ANDN2_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
saveexec = Reg(EXEC.u64)
|
|
EXEC.u64 = (S0.u64 & ~EXEC.u64)
|
|
D0.u64 = saveexec.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_ORN2_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
saveexec = Reg(EXEC.u64)
|
|
EXEC.u64 = (S0.u64 | ~EXEC.u64)
|
|
D0.u64 = saveexec.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_NAND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
saveexec = Reg(EXEC.u64)
|
|
EXEC.u64 = ~(S0.u64 & EXEC.u64)
|
|
D0.u64 = saveexec.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_NOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
saveexec = Reg(EXEC.u64)
|
|
EXEC.u64 = ~(S0.u64 | EXEC.u64)
|
|
D0.u64 = saveexec.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_XNOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
saveexec = Reg(EXEC.u64)
|
|
EXEC.u64 = ~(S0.u64 ^ EXEC.u64)
|
|
D0.u64 = saveexec.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_QUADMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
for i in range(0, int(7)+1):
|
|
tmp[i] = S0.u32[(i * 4) + (4) - 1 : (i * 4)] != 0
|
|
D0.u32 = tmp
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP1Op_S_QUADMASK_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
for i in range(0, int(15)+1):
|
|
tmp[i] = S0.u64[(i * 4) + (4) - 1 : (i * 4)] != 0
|
|
D0.u64 = tmp
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP1Op_S_ABS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = ((-S0.i32) if (S0.i32 < 0) else (S0.i32))
|
|
SCC = Reg(D0.i32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP1Op_S_SET_GPR_IDX_IDX(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
M0[7 : 0] = S0.u32[7 : 0].b8
|
|
return {}
|
|
|
|
def _SOP1Op_S_ANDN1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
saveexec = Reg(EXEC.u64)
|
|
EXEC.u64 = (~S0.u64 & EXEC.u64)
|
|
D0.u64 = saveexec.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_ORN1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
saveexec = Reg(EXEC.u64)
|
|
EXEC.u64 = (~S0.u64 | EXEC.u64)
|
|
D0.u64 = saveexec.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_ANDN1_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64 = (~S0.u64 & EXEC.u64)
|
|
D0.u64 = EXEC.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_ANDN2_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64 = (S0.u64 & ~EXEC.u64)
|
|
D0.u64 = EXEC.u64
|
|
SCC = Reg(EXEC.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC}
|
|
|
|
def _SOP1Op_S_BITREPLICATE_B64_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.u32)
|
|
for i in range(0, int(31)+1):
|
|
D0.u64[i * 2] = tmp[i]
|
|
D0.u64[i * 2 + 1] = tmp[i]
|
|
return {'D0': D0}
|
|
|
|
SOP1Op_FUNCTIONS = {
|
|
SOP1Op.S_MOV_B32: _SOP1Op_S_MOV_B32,
|
|
SOP1Op.S_MOV_B64: _SOP1Op_S_MOV_B64,
|
|
SOP1Op.S_CMOV_B32: _SOP1Op_S_CMOV_B32,
|
|
SOP1Op.S_CMOV_B64: _SOP1Op_S_CMOV_B64,
|
|
SOP1Op.S_NOT_B32: _SOP1Op_S_NOT_B32,
|
|
SOP1Op.S_NOT_B64: _SOP1Op_S_NOT_B64,
|
|
SOP1Op.S_WQM_B32: _SOP1Op_S_WQM_B32,
|
|
SOP1Op.S_WQM_B64: _SOP1Op_S_WQM_B64,
|
|
SOP1Op.S_BREV_B32: _SOP1Op_S_BREV_B32,
|
|
SOP1Op.S_BREV_B64: _SOP1Op_S_BREV_B64,
|
|
SOP1Op.S_BCNT0_I32_B32: _SOP1Op_S_BCNT0_I32_B32,
|
|
SOP1Op.S_BCNT0_I32_B64: _SOP1Op_S_BCNT0_I32_B64,
|
|
SOP1Op.S_BCNT1_I32_B32: _SOP1Op_S_BCNT1_I32_B32,
|
|
SOP1Op.S_BCNT1_I32_B64: _SOP1Op_S_BCNT1_I32_B64,
|
|
SOP1Op.S_FF0_I32_B32: _SOP1Op_S_FF0_I32_B32,
|
|
SOP1Op.S_FF0_I32_B64: _SOP1Op_S_FF0_I32_B64,
|
|
SOP1Op.S_FF1_I32_B32: _SOP1Op_S_FF1_I32_B32,
|
|
SOP1Op.S_FF1_I32_B64: _SOP1Op_S_FF1_I32_B64,
|
|
SOP1Op.S_FLBIT_I32_B32: _SOP1Op_S_FLBIT_I32_B32,
|
|
SOP1Op.S_FLBIT_I32_B64: _SOP1Op_S_FLBIT_I32_B64,
|
|
SOP1Op.S_FLBIT_I32: _SOP1Op_S_FLBIT_I32,
|
|
SOP1Op.S_FLBIT_I32_I64: _SOP1Op_S_FLBIT_I32_I64,
|
|
SOP1Op.S_SEXT_I32_I8: _SOP1Op_S_SEXT_I32_I8,
|
|
SOP1Op.S_SEXT_I32_I16: _SOP1Op_S_SEXT_I32_I16,
|
|
SOP1Op.S_BITSET0_B32: _SOP1Op_S_BITSET0_B32,
|
|
SOP1Op.S_BITSET0_B64: _SOP1Op_S_BITSET0_B64,
|
|
SOP1Op.S_BITSET1_B32: _SOP1Op_S_BITSET1_B32,
|
|
SOP1Op.S_BITSET1_B64: _SOP1Op_S_BITSET1_B64,
|
|
SOP1Op.S_GETPC_B64: _SOP1Op_S_GETPC_B64,
|
|
SOP1Op.S_SETPC_B64: _SOP1Op_S_SETPC_B64,
|
|
SOP1Op.S_SWAPPC_B64: _SOP1Op_S_SWAPPC_B64,
|
|
SOP1Op.S_RFE_B64: _SOP1Op_S_RFE_B64,
|
|
SOP1Op.S_AND_SAVEEXEC_B64: _SOP1Op_S_AND_SAVEEXEC_B64,
|
|
SOP1Op.S_OR_SAVEEXEC_B64: _SOP1Op_S_OR_SAVEEXEC_B64,
|
|
SOP1Op.S_XOR_SAVEEXEC_B64: _SOP1Op_S_XOR_SAVEEXEC_B64,
|
|
SOP1Op.S_ANDN2_SAVEEXEC_B64: _SOP1Op_S_ANDN2_SAVEEXEC_B64,
|
|
SOP1Op.S_ORN2_SAVEEXEC_B64: _SOP1Op_S_ORN2_SAVEEXEC_B64,
|
|
SOP1Op.S_NAND_SAVEEXEC_B64: _SOP1Op_S_NAND_SAVEEXEC_B64,
|
|
SOP1Op.S_NOR_SAVEEXEC_B64: _SOP1Op_S_NOR_SAVEEXEC_B64,
|
|
SOP1Op.S_XNOR_SAVEEXEC_B64: _SOP1Op_S_XNOR_SAVEEXEC_B64,
|
|
SOP1Op.S_QUADMASK_B32: _SOP1Op_S_QUADMASK_B32,
|
|
SOP1Op.S_QUADMASK_B64: _SOP1Op_S_QUADMASK_B64,
|
|
SOP1Op.S_ABS_I32: _SOP1Op_S_ABS_I32,
|
|
SOP1Op.S_SET_GPR_IDX_IDX: _SOP1Op_S_SET_GPR_IDX_IDX,
|
|
SOP1Op.S_ANDN1_SAVEEXEC_B64: _SOP1Op_S_ANDN1_SAVEEXEC_B64,
|
|
SOP1Op.S_ORN1_SAVEEXEC_B64: _SOP1Op_S_ORN1_SAVEEXEC_B64,
|
|
SOP1Op.S_ANDN1_WREXEC_B64: _SOP1Op_S_ANDN1_WREXEC_B64,
|
|
SOP1Op.S_ANDN2_WREXEC_B64: _SOP1Op_S_ANDN2_WREXEC_B64,
|
|
SOP1Op.S_BITREPLICATE_B64_B32: _SOP1Op_S_BITREPLICATE_B64_B32,
|
|
}
|
|
|
|
def _SOP2Op_S_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg((S0.u32) + (S1.u32))
|
|
SCC = Reg(((1) if (tmp >= 0x100000000) else (0)))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_SUB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.u32 - S1.u32)
|
|
SCC = Reg(((1) if (S1.u32 > S0.u32) else (0)))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_ADD_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.i32 + S1.i32)
|
|
SCC = Reg(((S0.u32[31] == S1.u32[31]) and (S0.u32[31] != tmp.u32[31])))
|
|
D0.i32 = tmp.i32
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_SUB_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.i32 - S1.i32)
|
|
SCC = Reg(((S0.u32[31] != S1.u32[31]) and (S0.u32[31] != tmp.u32[31])))
|
|
D0.i32 = tmp.i32
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_ADDC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg((S0.u32) + (S1.u32) + SCC.u64)
|
|
SCC = Reg(((1) if (tmp >= 0x100000000) else (0)))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_SUBB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.u32 - S1.u32 - SCC.u32)
|
|
SCC = Reg(((1) if ((S1.u32) + SCC.u64 > (S0.u32)) else (0)))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 < S1.i32)
|
|
D0.i32 = ((S0.i32) if (SCC) else (S1.i32))
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 < S1.u32)
|
|
D0.u32 = ((S0.u32) if (SCC) else (S1.u32))
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 >= S1.i32)
|
|
D0.i32 = ((S0.i32) if (SCC) else (S1.i32))
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 >= S1.u32)
|
|
D0.u32 = ((S0.u32) if (SCC) else (S1.u32))
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_CSELECT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S0.u32) if (SCC) else (S1.u32))
|
|
return {'D0': D0}
|
|
|
|
def _SOP2Op_S_CSELECT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = ((S0.u64) if (SCC) else (S1.u64))
|
|
return {'D0': D0}
|
|
|
|
def _SOP2Op_S_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 & S1.u32)
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_AND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = (S0.u64 & S1.u64)
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 | S1.u32)
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_OR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = (S0.u64 | S1.u64)
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 ^ S1.u32)
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_XOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = (S0.u64 ^ S1.u64)
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_ANDN2_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 & ~S1.u32)
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_ANDN2_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = (S0.u64 & ~S1.u64)
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_ORN2_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 | ~S1.u32)
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_ORN2_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = (S0.u64 | ~S1.u64)
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_NAND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ~(S0.u32 & S1.u32)
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_NAND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = ~(S0.u64 & S1.u64)
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_NOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ~(S0.u32 | S1.u32)
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_NOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = ~(S0.u64 | S1.u64)
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ~(S0.u32 ^ S1.u32)
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_XNOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = ~(S0.u64 ^ S1.u64)
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_LSHL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 << S1[4 : 0].u32)
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_LSHL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = (S0.u64 << S1[5 : 0].u32)
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_LSHR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 >> S1[4 : 0].u32)
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_LSHR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = (S0.u64 >> S1[5 : 0].u32)
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_ASHR_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (signext(S0.i32) >> S1[4 : 0].u32)
|
|
SCC = Reg(D0.i32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_ASHR_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32)
|
|
SCC = Reg(D0.i64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _SOP2Op_S_BFM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = (((1 << S0[5 : 0].u32) - 1) << S1[5 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _SOP2Op_S_MUL_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = S0.i32 * S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _SOP2Op_S_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1))
|
|
SCC = Reg(D0.u32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1))
|
|
D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32)
|
|
SCC = Reg(D0.i32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_BFE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1))
|
|
SCC = Reg(D0.u64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_BFE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1))
|
|
D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32)
|
|
SCC = Reg(D0.i64 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_ABSDIFF_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = S0.i32 - S1.i32
|
|
if D0.i32 < 0:
|
|
D0.i32 = -D0.i32
|
|
SCC = Reg(D0.i32 != 0)
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (((S0.u32) * (S1.u32)) >> 32)
|
|
return {'D0': D0}
|
|
|
|
def _SOP2Op_S_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (((S0.i32) * (S1.i32)) >> 32)
|
|
return {'D0': D0}
|
|
|
|
def _SOP2Op_S_LSHL1_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(((S0.u32) << 1) + (S1.u32))
|
|
SCC = Reg(((1) if (tmp >= 0x100000000) else (0)))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_LSHL2_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(((S0.u32) << 2) + (S1.u32))
|
|
SCC = Reg(((1) if (tmp >= 0x100000000) else (0)))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_LSHL3_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(((S0.u32) << 3) + (S1.u32))
|
|
SCC = Reg(((1) if (tmp >= 0x100000000) else (0)))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_LSHL4_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(((S0.u32) << 4) + (S1.u32))
|
|
SCC = Reg(((1) if (tmp >= 0x100000000) else (0)))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOP2Op_S_PACK_LL_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0 = Reg(_pack(S1[15 : 0].u16, S0[15 : 0].u16))
|
|
return {}
|
|
|
|
def _SOP2Op_S_PACK_LH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0 = Reg(_pack(S1[31 : 16].u16, S0[15 : 0].u16))
|
|
return {}
|
|
|
|
def _SOP2Op_S_PACK_HH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0 = Reg(_pack(S1[31 : 16].u16, S0[31 : 16].u16))
|
|
return {}
|
|
|
|
SOP2Op_FUNCTIONS = {
|
|
SOP2Op.S_ADD_U32: _SOP2Op_S_ADD_U32,
|
|
SOP2Op.S_SUB_U32: _SOP2Op_S_SUB_U32,
|
|
SOP2Op.S_ADD_I32: _SOP2Op_S_ADD_I32,
|
|
SOP2Op.S_SUB_I32: _SOP2Op_S_SUB_I32,
|
|
SOP2Op.S_ADDC_U32: _SOP2Op_S_ADDC_U32,
|
|
SOP2Op.S_SUBB_U32: _SOP2Op_S_SUBB_U32,
|
|
SOP2Op.S_MIN_I32: _SOP2Op_S_MIN_I32,
|
|
SOP2Op.S_MIN_U32: _SOP2Op_S_MIN_U32,
|
|
SOP2Op.S_MAX_I32: _SOP2Op_S_MAX_I32,
|
|
SOP2Op.S_MAX_U32: _SOP2Op_S_MAX_U32,
|
|
SOP2Op.S_CSELECT_B32: _SOP2Op_S_CSELECT_B32,
|
|
SOP2Op.S_CSELECT_B64: _SOP2Op_S_CSELECT_B64,
|
|
SOP2Op.S_AND_B32: _SOP2Op_S_AND_B32,
|
|
SOP2Op.S_AND_B64: _SOP2Op_S_AND_B64,
|
|
SOP2Op.S_OR_B32: _SOP2Op_S_OR_B32,
|
|
SOP2Op.S_OR_B64: _SOP2Op_S_OR_B64,
|
|
SOP2Op.S_XOR_B32: _SOP2Op_S_XOR_B32,
|
|
SOP2Op.S_XOR_B64: _SOP2Op_S_XOR_B64,
|
|
SOP2Op.S_ANDN2_B32: _SOP2Op_S_ANDN2_B32,
|
|
SOP2Op.S_ANDN2_B64: _SOP2Op_S_ANDN2_B64,
|
|
SOP2Op.S_ORN2_B32: _SOP2Op_S_ORN2_B32,
|
|
SOP2Op.S_ORN2_B64: _SOP2Op_S_ORN2_B64,
|
|
SOP2Op.S_NAND_B32: _SOP2Op_S_NAND_B32,
|
|
SOP2Op.S_NAND_B64: _SOP2Op_S_NAND_B64,
|
|
SOP2Op.S_NOR_B32: _SOP2Op_S_NOR_B32,
|
|
SOP2Op.S_NOR_B64: _SOP2Op_S_NOR_B64,
|
|
SOP2Op.S_XNOR_B32: _SOP2Op_S_XNOR_B32,
|
|
SOP2Op.S_XNOR_B64: _SOP2Op_S_XNOR_B64,
|
|
SOP2Op.S_LSHL_B32: _SOP2Op_S_LSHL_B32,
|
|
SOP2Op.S_LSHL_B64: _SOP2Op_S_LSHL_B64,
|
|
SOP2Op.S_LSHR_B32: _SOP2Op_S_LSHR_B32,
|
|
SOP2Op.S_LSHR_B64: _SOP2Op_S_LSHR_B64,
|
|
SOP2Op.S_ASHR_I32: _SOP2Op_S_ASHR_I32,
|
|
SOP2Op.S_ASHR_I64: _SOP2Op_S_ASHR_I64,
|
|
SOP2Op.S_BFM_B32: _SOP2Op_S_BFM_B32,
|
|
SOP2Op.S_BFM_B64: _SOP2Op_S_BFM_B64,
|
|
SOP2Op.S_MUL_I32: _SOP2Op_S_MUL_I32,
|
|
SOP2Op.S_BFE_U32: _SOP2Op_S_BFE_U32,
|
|
SOP2Op.S_BFE_I32: _SOP2Op_S_BFE_I32,
|
|
SOP2Op.S_BFE_U64: _SOP2Op_S_BFE_U64,
|
|
SOP2Op.S_BFE_I64: _SOP2Op_S_BFE_I64,
|
|
SOP2Op.S_ABSDIFF_I32: _SOP2Op_S_ABSDIFF_I32,
|
|
SOP2Op.S_MUL_HI_U32: _SOP2Op_S_MUL_HI_U32,
|
|
SOP2Op.S_MUL_HI_I32: _SOP2Op_S_MUL_HI_I32,
|
|
SOP2Op.S_LSHL1_ADD_U32: _SOP2Op_S_LSHL1_ADD_U32,
|
|
SOP2Op.S_LSHL2_ADD_U32: _SOP2Op_S_LSHL2_ADD_U32,
|
|
SOP2Op.S_LSHL3_ADD_U32: _SOP2Op_S_LSHL3_ADD_U32,
|
|
SOP2Op.S_LSHL4_ADD_U32: _SOP2Op_S_LSHL4_ADD_U32,
|
|
SOP2Op.S_PACK_LL_B32_B16: _SOP2Op_S_PACK_LL_B32_B16,
|
|
SOP2Op.S_PACK_LH_B32_B16: _SOP2Op_S_PACK_LH_B32_B16,
|
|
SOP2Op.S_PACK_HH_B32_B16: _SOP2Op_S_PACK_HH_B32_B16,
|
|
}
|
|
|
|
def _SOPCOp_S_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 == S1.i32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 != S1.i32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 > S1.i32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 >= S1.i32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 < S1.i32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 <= S1.i32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 == S1.u32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 != S1.u32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 > S1.u32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 >= S1.u32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 < S1.u32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 <= S1.u32)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_BITCMP0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32[S1.u32[4 : 0]] == 0)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_BITCMP1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32[S1.u32[4 : 0]] == 1)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_BITCMP0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u64[S1.u32[5 : 0]] == 0)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_BITCMP1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u64[S1.u32[5 : 0]] == 1)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u64 == S1.u64)
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPCOp_S_CMP_LG_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u64 != S1.u64)
|
|
return {'SCC': SCC}
|
|
|
|
SOPCOp_FUNCTIONS = {
|
|
SOPCOp.S_CMP_EQ_I32: _SOPCOp_S_CMP_EQ_I32,
|
|
SOPCOp.S_CMP_LG_I32: _SOPCOp_S_CMP_LG_I32,
|
|
SOPCOp.S_CMP_GT_I32: _SOPCOp_S_CMP_GT_I32,
|
|
SOPCOp.S_CMP_GE_I32: _SOPCOp_S_CMP_GE_I32,
|
|
SOPCOp.S_CMP_LT_I32: _SOPCOp_S_CMP_LT_I32,
|
|
SOPCOp.S_CMP_LE_I32: _SOPCOp_S_CMP_LE_I32,
|
|
SOPCOp.S_CMP_EQ_U32: _SOPCOp_S_CMP_EQ_U32,
|
|
SOPCOp.S_CMP_LG_U32: _SOPCOp_S_CMP_LG_U32,
|
|
SOPCOp.S_CMP_GT_U32: _SOPCOp_S_CMP_GT_U32,
|
|
SOPCOp.S_CMP_GE_U32: _SOPCOp_S_CMP_GE_U32,
|
|
SOPCOp.S_CMP_LT_U32: _SOPCOp_S_CMP_LT_U32,
|
|
SOPCOp.S_CMP_LE_U32: _SOPCOp_S_CMP_LE_U32,
|
|
SOPCOp.S_BITCMP0_B32: _SOPCOp_S_BITCMP0_B32,
|
|
SOPCOp.S_BITCMP1_B32: _SOPCOp_S_BITCMP1_B32,
|
|
SOPCOp.S_BITCMP0_B64: _SOPCOp_S_BITCMP0_B64,
|
|
SOPCOp.S_BITCMP1_B64: _SOPCOp_S_BITCMP1_B64,
|
|
SOPCOp.S_CMP_EQ_U64: _SOPCOp_S_CMP_EQ_U64,
|
|
SOPCOp.S_CMP_LG_U64: _SOPCOp_S_CMP_LG_U64,
|
|
}
|
|
|
|
def _SOPKOp_S_MOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (signext(S0.i16))
|
|
return {'D0': D0}
|
|
|
|
def _SOPKOp_S_CMOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if SCC:
|
|
D0.i32 = (signext(S0.i16))
|
|
return {'D0': D0}
|
|
|
|
def _SOPKOp_S_CMPK_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 == (signext(S1.i16)))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_CMPK_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 != (signext(S1.i16)))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_CMPK_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 > (signext(S1.i16)))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_CMPK_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 >= (signext(S1.i16)))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_CMPK_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 < (signext(S1.i16)))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_CMPK_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.i32 <= (signext(S1.i16)))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_CMPK_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 == (S1.u16))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_CMPK_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 != (S1.u16))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_CMPK_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 > (S1.u16))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_CMPK_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 >= (S1.u16))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_CMPK_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 < (S1.u16))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_CMPK_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SCC = Reg(S0.u32 <= (S1.u16))
|
|
return {'SCC': SCC}
|
|
|
|
def _SOPKOp_S_ADDK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.i32)
|
|
D0.i32 = D0.i32 + (signext(S0.i16))
|
|
SCC = Reg(((tmp[31] == S0.i16[15]) and (tmp[31] != D0.i32[31])))
|
|
return {'D0': D0, 'SCC': SCC}
|
|
|
|
def _SOPKOp_S_MULK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = D0.i32 * (signext(S0.i16))
|
|
return {'D0': D0}
|
|
|
|
def _SOPKOp_S_CALL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
D0.i64 = PC + 4
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
return {'D0': D0, 'PC': PC}
|
|
|
|
SOPKOp_FUNCTIONS = {
|
|
SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32,
|
|
SOPKOp.S_CMOVK_I32: _SOPKOp_S_CMOVK_I32,
|
|
SOPKOp.S_CMPK_EQ_I32: _SOPKOp_S_CMPK_EQ_I32,
|
|
SOPKOp.S_CMPK_LG_I32: _SOPKOp_S_CMPK_LG_I32,
|
|
SOPKOp.S_CMPK_GT_I32: _SOPKOp_S_CMPK_GT_I32,
|
|
SOPKOp.S_CMPK_GE_I32: _SOPKOp_S_CMPK_GE_I32,
|
|
SOPKOp.S_CMPK_LT_I32: _SOPKOp_S_CMPK_LT_I32,
|
|
SOPKOp.S_CMPK_LE_I32: _SOPKOp_S_CMPK_LE_I32,
|
|
SOPKOp.S_CMPK_EQ_U32: _SOPKOp_S_CMPK_EQ_U32,
|
|
SOPKOp.S_CMPK_LG_U32: _SOPKOp_S_CMPK_LG_U32,
|
|
SOPKOp.S_CMPK_GT_U32: _SOPKOp_S_CMPK_GT_U32,
|
|
SOPKOp.S_CMPK_GE_U32: _SOPKOp_S_CMPK_GE_U32,
|
|
SOPKOp.S_CMPK_LT_U32: _SOPKOp_S_CMPK_LT_U32,
|
|
SOPKOp.S_CMPK_LE_U32: _SOPKOp_S_CMPK_LE_U32,
|
|
SOPKOp.S_ADDK_I32: _SOPKOp_S_ADDK_I32,
|
|
SOPKOp.S_MULK_I32: _SOPKOp_S_MULK_I32,
|
|
SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64,
|
|
}
|
|
|
|
def _SOPPOp_S_NOP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
for i in range(0, int(SIMM16.u16[3 : 0].u32)+1):
|
|
pass
|
|
return {}
|
|
|
|
def _SOPPOp_S_BRANCH(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
return {'PC': PC}
|
|
|
|
def _SOPPOp_S_CBRANCH_SCC0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
if SCC == 0:
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
else:
|
|
PC = Reg(PC + 4)
|
|
return {'SCC': SCC, 'PC': PC}
|
|
|
|
def _SOPPOp_S_CBRANCH_SCC1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
if SCC == 1:
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
else:
|
|
PC = Reg(PC + 4)
|
|
return {'SCC': SCC, 'PC': PC}
|
|
|
|
def _SOPPOp_S_CBRANCH_VCCZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
VCCZ = Reg(1 if VCC._val == 0 else 0)
|
|
# --- compiled pseudocode ---
|
|
if VCCZ.u1 == 1:
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
else:
|
|
PC = Reg(PC + 4)
|
|
return {'PC': PC}
|
|
|
|
def _SOPPOp_S_CBRANCH_VCCNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
VCCZ = Reg(1 if VCC._val == 0 else 0)
|
|
# --- compiled pseudocode ---
|
|
if VCCZ.u1 == 0:
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
else:
|
|
PC = Reg(PC + 4)
|
|
return {'PC': PC}
|
|
|
|
def _SOPPOp_S_CBRANCH_EXECZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
EXECZ = Reg(1 if EXEC._val == 0 else 0)
|
|
# --- compiled pseudocode ---
|
|
if EXECZ.u1 == 1:
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
else:
|
|
PC = Reg(PC + 4)
|
|
return {'PC': PC}
|
|
|
|
def _SOPPOp_S_CBRANCH_EXECNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
EXECZ = Reg(1 if EXEC._val == 0 else 0)
|
|
# --- compiled pseudocode ---
|
|
if EXECZ.u1 == 0:
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
else:
|
|
PC = Reg(PC + 4)
|
|
return {'PC': PC}
|
|
|
|
def _SOPPOp_S_TRAP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
return {'PC': PC}
|
|
|
|
def _SOPPOp_S_CBRANCH_CDBGSYS(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
if WAVE_STATUS.COND_DBG_SYS.u32 != 0:
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
else:
|
|
PC = Reg(PC + 4)
|
|
return {'PC': PC}
|
|
|
|
def _SOPPOp_S_CBRANCH_CDBGUSER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
if WAVE_STATUS.COND_DBG_USER.u32 != 0:
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
else:
|
|
PC = Reg(PC + 4)
|
|
return {'PC': PC}
|
|
|
|
def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
if (WAVE_STATUS.COND_DBG_SYS or WAVE_STATUS.COND_DBG_USER):
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
else:
|
|
PC = Reg(PC + 4)
|
|
return {'PC': PC}
|
|
|
|
def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
if (WAVE_STATUS.COND_DBG_SYS and WAVE_STATUS.COND_DBG_USER):
|
|
PC = Reg(PC + signext(SIMM16.i16 * 4) + 4)
|
|
else:
|
|
PC = Reg(PC + 4)
|
|
return {'PC': PC}
|
|
|
|
def _SOPPOp_S_SET_GPR_IDX_MODE(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
VDST = Reg(vdst_idx)
|
|
# --- compiled pseudocode ---
|
|
SIMM16[1] = VSRC1_REL, SIMM16[2] = VSRC2_REL and SIMM16[3] = VDST_REL.
|
|
return {}
|
|
|
|
SOPPOp_FUNCTIONS = {
|
|
SOPPOp.S_NOP: _SOPPOp_S_NOP,
|
|
SOPPOp.S_BRANCH: _SOPPOp_S_BRANCH,
|
|
SOPPOp.S_CBRANCH_SCC0: _SOPPOp_S_CBRANCH_SCC0,
|
|
SOPPOp.S_CBRANCH_SCC1: _SOPPOp_S_CBRANCH_SCC1,
|
|
SOPPOp.S_CBRANCH_VCCZ: _SOPPOp_S_CBRANCH_VCCZ,
|
|
SOPPOp.S_CBRANCH_VCCNZ: _SOPPOp_S_CBRANCH_VCCNZ,
|
|
SOPPOp.S_CBRANCH_EXECZ: _SOPPOp_S_CBRANCH_EXECZ,
|
|
SOPPOp.S_CBRANCH_EXECNZ: _SOPPOp_S_CBRANCH_EXECNZ,
|
|
SOPPOp.S_TRAP: _SOPPOp_S_TRAP,
|
|
SOPPOp.S_CBRANCH_CDBGSYS: _SOPPOp_S_CBRANCH_CDBGSYS,
|
|
SOPPOp.S_CBRANCH_CDBGUSER: _SOPPOp_S_CBRANCH_CDBGUSER,
|
|
SOPPOp.S_CBRANCH_CDBGSYS_OR_USER: _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER,
|
|
SOPPOp.S_CBRANCH_CDBGSYS_AND_USER: _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER,
|
|
SOPPOp.S_SET_GPR_IDX_MODE: _SOPPOp_S_SET_GPR_IDX_MODE,
|
|
}
|
|
|
|
def _VOP1Op_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.b32 = S0.b32
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
if EXEC == 0x0:
|
|
lane = 0
|
|
else:
|
|
lane = s_ff1_i32_b64(EXEC)
|
|
D0.b32 = VGPR[lane][SRC0.u32]
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = f64_to_i32(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = i32_to_f64(S0.i32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = i32_to_f32(S0.i32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = u32_to_f32(S0.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = f32_to_u32(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = f32_to_i32(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = f32_to_f16(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = f16_to_f32(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_RPI_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = f32_to_i32(floor(S0.f32 + 0.5))
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_FLR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = f32_to_i32(floor(S0.f32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = f64_to_f32(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = f32_to_f64(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = u32_to_f32(S0[7 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = u32_to_f32(S0[15 : 8].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = u32_to_f32(S0[23 : 16].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = u32_to_f32(S0[31 : 24].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = f64_to_u32(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = u32_to_f64(S0.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = trunc(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = trunc(S0.f64)
|
|
if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)):
|
|
D0.f64 += 1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = floor(S0.f64 + 0.5)
|
|
if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)):
|
|
D0.f64 -= 1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = trunc(S0.f64)
|
|
if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)):
|
|
D0.f64 += -1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = S0.f32 + -floor(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = trunc(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = trunc(S0.f32)
|
|
if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)):
|
|
D0.f32 += 1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = floor(S0.f32 + 0.5)
|
|
if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)):
|
|
D0.f32 -= 1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = trunc(S0.f32)
|
|
if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)):
|
|
D0.f32 += -1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = pow(2.0, S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = log2(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = 1.0 / S0.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = 1.0 / S0.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = 1.0 / sqrt(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = 1.0 / S0.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = 1.0 / sqrt(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = sqrt(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = sqrt(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = sin(S0.f32 * F(PI * 2.0))
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = cos(S0.f32 * F(PI * 2.0))
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ~S0.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32[31 : 0] = S0.u32[0 : 31]
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FFBH_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = -1
|
|
for i in range(0, int(31)+1):
|
|
if S0.u32[31 - i] == 1:
|
|
D0.i32 = i; break
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FFBL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = -1
|
|
for i in range(0, int(31)+1):
|
|
if S0.u32[i] == 1:
|
|
D0.i32 = i; break
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FFBH_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = -1
|
|
for i in range(1, int(31)+1):
|
|
if S0.i32[31 - i] != S0.i32[31]:
|
|
D0.i32 = i; break
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)):
|
|
D0.i32 = 0
|
|
else:
|
|
D0.i32 = exponent(S0.f64) - 1023 + 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)):
|
|
D0.f64 = S0.f64
|
|
else:
|
|
D0.f64 = mantissa(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = S0.f64 + -floor(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))):
|
|
D0.i32 = 0
|
|
else:
|
|
D0.i32 = exponent(S0.f32) - 127 + 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))):
|
|
D0.f32 = S0.f32
|
|
else:
|
|
D0.f32 = mantissa(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.b64 = S0.b64
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = u16_to_f16(S0.u16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = i16_to_f16(S0.i16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = f16_to_u16(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = f16_to_i16(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = 1.0 / S0.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = sqrt(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = 1.0 / sqrt(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = log2(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = pow(2.0, S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FREXP_MANT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))):
|
|
D0.f16 = S0.f16
|
|
else:
|
|
D0.f16 = mantissa(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))):
|
|
D0.i16 = 0
|
|
else:
|
|
D0.i16 = (exponent(S0.f16) - 15 + 1)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = trunc(S0.f16)
|
|
if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)):
|
|
D0.f16 += -1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = trunc(S0.f16)
|
|
if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)):
|
|
D0.f16 += 1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = trunc(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = floor(S0.f16 + 0.5)
|
|
if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)):
|
|
D0.f16 -= 1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_FRACT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S0.f16 + -floor(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_SIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = sin(S0.f16 * F(PI * 2.0))
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_COS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = cos(S0.f16 * F(PI * 2.0))
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = f16_to_snorm(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = f16_to_unorm(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_SAT_PK_U8_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16)
|
|
tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16)
|
|
D0.b16 = tmp.b16
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_SWAP_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.b32)
|
|
D0.b32 = S0.b32
|
|
S0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
if SDWA_SRC0_SEL == BYTE1.b3:
|
|
D0.f32 = fp8_to_f32(S0[15 : 8].fp8)
|
|
elif SDWA_SRC0_SEL == BYTE2.b3:
|
|
D0.f32 = fp8_to_f32(S0[23 : 16].fp8)
|
|
elif SDWA_SRC0_SEL == BYTE3.b3:
|
|
D0.f32 = fp8_to_f32(S0[31 : 24].fp8)
|
|
else:
|
|
D0.f32 = fp8_to_f32(S0[7 : 0].fp8)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
if SDWA_SRC0_SEL == BYTE1.b3:
|
|
D0.f32 = bf8_to_f32(S0[15 : 8].bf8)
|
|
elif SDWA_SRC0_SEL == BYTE2.b3:
|
|
D0.f32 = bf8_to_f32(S0[23 : 16].bf8)
|
|
elif SDWA_SRC0_SEL == BYTE3.b3:
|
|
D0.f32 = bf8_to_f32(S0[31 : 24].bf8)
|
|
else:
|
|
D0.f32 = bf8_to_f32(S0[7 : 0].bf8)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_PK_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D1 = Reg(0)
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(((S0[31 : 16]) if (SDWA_SRC0_SEL[1 : 0] == WORD1.b2) else (S0[15 : 0])))
|
|
D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8)
|
|
D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_PK_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D1 = Reg(0)
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(((S0[31 : 16]) if (SDWA_SRC0_SEL[1 : 0] == WORD1.b2) else (S0[15 : 0])))
|
|
D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8)
|
|
D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8)
|
|
return {'D0': D0}
|
|
|
|
def _VOP1Op_V_CVT_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = F(_pack(S0.b16, 0))
|
|
return {'D0': D0}
|
|
|
|
VOP1Op_FUNCTIONS = {
|
|
VOP1Op.V_MOV_B32: _VOP1Op_V_MOV_B32,
|
|
VOP1Op.V_READFIRSTLANE_B32: _VOP1Op_V_READFIRSTLANE_B32,
|
|
VOP1Op.V_CVT_I32_F64: _VOP1Op_V_CVT_I32_F64,
|
|
VOP1Op.V_CVT_F64_I32: _VOP1Op_V_CVT_F64_I32,
|
|
VOP1Op.V_CVT_F32_I32: _VOP1Op_V_CVT_F32_I32,
|
|
VOP1Op.V_CVT_F32_U32: _VOP1Op_V_CVT_F32_U32,
|
|
VOP1Op.V_CVT_U32_F32: _VOP1Op_V_CVT_U32_F32,
|
|
VOP1Op.V_CVT_I32_F32: _VOP1Op_V_CVT_I32_F32,
|
|
VOP1Op.V_CVT_F16_F32: _VOP1Op_V_CVT_F16_F32,
|
|
VOP1Op.V_CVT_F32_F16: _VOP1Op_V_CVT_F32_F16,
|
|
VOP1Op.V_CVT_RPI_I32_F32: _VOP1Op_V_CVT_RPI_I32_F32,
|
|
VOP1Op.V_CVT_FLR_I32_F32: _VOP1Op_V_CVT_FLR_I32_F32,
|
|
VOP1Op.V_CVT_F32_F64: _VOP1Op_V_CVT_F32_F64,
|
|
VOP1Op.V_CVT_F64_F32: _VOP1Op_V_CVT_F64_F32,
|
|
VOP1Op.V_CVT_F32_UBYTE0: _VOP1Op_V_CVT_F32_UBYTE0,
|
|
VOP1Op.V_CVT_F32_UBYTE1: _VOP1Op_V_CVT_F32_UBYTE1,
|
|
VOP1Op.V_CVT_F32_UBYTE2: _VOP1Op_V_CVT_F32_UBYTE2,
|
|
VOP1Op.V_CVT_F32_UBYTE3: _VOP1Op_V_CVT_F32_UBYTE3,
|
|
VOP1Op.V_CVT_U32_F64: _VOP1Op_V_CVT_U32_F64,
|
|
VOP1Op.V_CVT_F64_U32: _VOP1Op_V_CVT_F64_U32,
|
|
VOP1Op.V_TRUNC_F64: _VOP1Op_V_TRUNC_F64,
|
|
VOP1Op.V_CEIL_F64: _VOP1Op_V_CEIL_F64,
|
|
VOP1Op.V_RNDNE_F64: _VOP1Op_V_RNDNE_F64,
|
|
VOP1Op.V_FLOOR_F64: _VOP1Op_V_FLOOR_F64,
|
|
VOP1Op.V_FRACT_F32: _VOP1Op_V_FRACT_F32,
|
|
VOP1Op.V_TRUNC_F32: _VOP1Op_V_TRUNC_F32,
|
|
VOP1Op.V_CEIL_F32: _VOP1Op_V_CEIL_F32,
|
|
VOP1Op.V_RNDNE_F32: _VOP1Op_V_RNDNE_F32,
|
|
VOP1Op.V_FLOOR_F32: _VOP1Op_V_FLOOR_F32,
|
|
VOP1Op.V_EXP_F32: _VOP1Op_V_EXP_F32,
|
|
VOP1Op.V_LOG_F32: _VOP1Op_V_LOG_F32,
|
|
VOP1Op.V_RCP_F32: _VOP1Op_V_RCP_F32,
|
|
VOP1Op.V_RCP_IFLAG_F32: _VOP1Op_V_RCP_IFLAG_F32,
|
|
VOP1Op.V_RSQ_F32: _VOP1Op_V_RSQ_F32,
|
|
VOP1Op.V_RCP_F64: _VOP1Op_V_RCP_F64,
|
|
VOP1Op.V_RSQ_F64: _VOP1Op_V_RSQ_F64,
|
|
VOP1Op.V_SQRT_F32: _VOP1Op_V_SQRT_F32,
|
|
VOP1Op.V_SQRT_F64: _VOP1Op_V_SQRT_F64,
|
|
VOP1Op.V_SIN_F32: _VOP1Op_V_SIN_F32,
|
|
VOP1Op.V_COS_F32: _VOP1Op_V_COS_F32,
|
|
VOP1Op.V_NOT_B32: _VOP1Op_V_NOT_B32,
|
|
VOP1Op.V_BFREV_B32: _VOP1Op_V_BFREV_B32,
|
|
VOP1Op.V_FFBH_U32: _VOP1Op_V_FFBH_U32,
|
|
VOP1Op.V_FFBL_B32: _VOP1Op_V_FFBL_B32,
|
|
VOP1Op.V_FFBH_I32: _VOP1Op_V_FFBH_I32,
|
|
VOP1Op.V_FREXP_EXP_I32_F64: _VOP1Op_V_FREXP_EXP_I32_F64,
|
|
VOP1Op.V_FREXP_MANT_F64: _VOP1Op_V_FREXP_MANT_F64,
|
|
VOP1Op.V_FRACT_F64: _VOP1Op_V_FRACT_F64,
|
|
VOP1Op.V_FREXP_EXP_I32_F32: _VOP1Op_V_FREXP_EXP_I32_F32,
|
|
VOP1Op.V_FREXP_MANT_F32: _VOP1Op_V_FREXP_MANT_F32,
|
|
VOP1Op.V_MOV_B64: _VOP1Op_V_MOV_B64,
|
|
VOP1Op.V_CVT_F16_U16: _VOP1Op_V_CVT_F16_U16,
|
|
VOP1Op.V_CVT_F16_I16: _VOP1Op_V_CVT_F16_I16,
|
|
VOP1Op.V_CVT_U16_F16: _VOP1Op_V_CVT_U16_F16,
|
|
VOP1Op.V_CVT_I16_F16: _VOP1Op_V_CVT_I16_F16,
|
|
VOP1Op.V_RCP_F16: _VOP1Op_V_RCP_F16,
|
|
VOP1Op.V_SQRT_F16: _VOP1Op_V_SQRT_F16,
|
|
VOP1Op.V_RSQ_F16: _VOP1Op_V_RSQ_F16,
|
|
VOP1Op.V_LOG_F16: _VOP1Op_V_LOG_F16,
|
|
VOP1Op.V_EXP_F16: _VOP1Op_V_EXP_F16,
|
|
VOP1Op.V_FREXP_MANT_F16: _VOP1Op_V_FREXP_MANT_F16,
|
|
VOP1Op.V_FREXP_EXP_I16_F16: _VOP1Op_V_FREXP_EXP_I16_F16,
|
|
VOP1Op.V_FLOOR_F16: _VOP1Op_V_FLOOR_F16,
|
|
VOP1Op.V_CEIL_F16: _VOP1Op_V_CEIL_F16,
|
|
VOP1Op.V_TRUNC_F16: _VOP1Op_V_TRUNC_F16,
|
|
VOP1Op.V_RNDNE_F16: _VOP1Op_V_RNDNE_F16,
|
|
VOP1Op.V_FRACT_F16: _VOP1Op_V_FRACT_F16,
|
|
VOP1Op.V_SIN_F16: _VOP1Op_V_SIN_F16,
|
|
VOP1Op.V_COS_F16: _VOP1Op_V_COS_F16,
|
|
VOP1Op.V_CVT_NORM_I16_F16: _VOP1Op_V_CVT_NORM_I16_F16,
|
|
VOP1Op.V_CVT_NORM_U16_F16: _VOP1Op_V_CVT_NORM_U16_F16,
|
|
VOP1Op.V_SAT_PK_U8_I16: _VOP1Op_V_SAT_PK_U8_I16,
|
|
VOP1Op.V_SWAP_B32: _VOP1Op_V_SWAP_B32,
|
|
VOP1Op.V_CVT_F32_FP8: _VOP1Op_V_CVT_F32_FP8,
|
|
VOP1Op.V_CVT_F32_BF8: _VOP1Op_V_CVT_F32_BF8,
|
|
VOP1Op.V_CVT_PK_F32_FP8: _VOP1Op_V_CVT_PK_F32_FP8,
|
|
VOP1Op.V_CVT_PK_F32_BF8: _VOP1Op_V_CVT_PK_F32_BF8,
|
|
VOP1Op.V_CVT_F32_BF16: _VOP1Op_V_CVT_F32_BF16,
|
|
}
|
|
|
|
def _VOP2Op_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = S0.f32 + S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = S0.f32 - S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = S1.f32 - S0.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_FMAC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = fma(S0.f64, S1.f64, D0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = S0.f32 * S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (S0.i24) * (S1.i24)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (((S0.i24) * (S1.i24)) >> 32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u24) * (S1.u24)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (((S0.u24) * (S1.u24)) >> 32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))):
|
|
D0.f32 = F(cvtToQuietNAN(F(S0.f32)))
|
|
elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))):
|
|
D0.f32 = F(cvtToQuietNAN(F(S1.f32)))
|
|
elif isNAN(F(S0.f32)):
|
|
D0.f32 = S1.f32
|
|
elif isNAN(F(S1.f32)):
|
|
D0.f32 = S0.f32
|
|
elif ((F(S0.f32) == +0.0) and (F(S1.f32) == -0.0)):
|
|
D0.f32 = S1.f32
|
|
elif ((F(S0.f32) == -0.0) and (F(S1.f32) == +0.0)):
|
|
D0.f32 = S0.f32
|
|
else:
|
|
D0.f32 = ((S0.f32) if (S0.f32 < S1.f32) else (S1.f32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))):
|
|
D0.f32 = F(cvtToQuietNAN(F(S0.f32)))
|
|
elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))):
|
|
D0.f32 = F(cvtToQuietNAN(F(S1.f32)))
|
|
elif isNAN(F(S0.f32)):
|
|
D0.f32 = S1.f32
|
|
elif isNAN(F(S1.f32)):
|
|
D0.f32 = S0.f32
|
|
elif ((F(S0.f32) == +0.0) and (F(S1.f32) == -0.0)):
|
|
D0.f32 = S0.f32
|
|
elif ((F(S0.f32) == -0.0) and (F(S1.f32) == +0.0)):
|
|
D0.f32 = S1.f32
|
|
elif WAVE_MODE.IEEE:
|
|
D0.f32 = ((S0.f32) if (S0.f32 >= S1.f32) else (S1.f32))
|
|
else:
|
|
D0.f32 = ((S0.f32) if (S0.f32 > S1.f32) else (S1.f32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S1.u32 >> S0[4 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (S1.i32 >> S0[4 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S1.u32 << S0[4 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 & S1.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 | S1.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 ^ S1.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_FMAMK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM32 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_FMAAK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM32 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg((S0.u32) + (S1.u32))
|
|
VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP2Op_V_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.u32 - S1.u32)
|
|
VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP2Op_V_SUBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S1.u32 - S0.u32)
|
|
VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP2Op_V_ADDC_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId])
|
|
VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP2Op_V_SUBB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId])
|
|
VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP2Op_V_SUBBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId])
|
|
VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP2Op_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S0.f16 + S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S0.f16 - S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S1.f16 - S0.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S0.f16 * S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.f16 * S1.f16 + D0.f16)
|
|
if OPSEL.u4[3]:
|
|
D0 = Reg(_pack(tmp.f16, D0[15 : 0]))
|
|
else:
|
|
D0 = Reg(_pack(0, tmp.f16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MADMK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(S0.f16 * SIMM16.f16 + S1.f16)
|
|
return {}
|
|
|
|
def _VOP2Op_V_MADAK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SIMM16 = Reg(literal)
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(S0.f16 * S1.f16 + SIMM16.f16)
|
|
return {}
|
|
|
|
def _VOP2Op_V_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = S0.u16 + S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = S0.u16 - S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_SUBREV_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = S1.u16 - S0.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = S0.u16 * S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = (S1.u16 << S0[3 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = (S1.u16 >> S0[3 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = (S1.i16 >> S0[3 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))):
|
|
D0.f16 = F(cvtToQuietNAN(F(S0.f16)))
|
|
elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))):
|
|
D0.f16 = F(cvtToQuietNAN(F(S1.f16)))
|
|
elif isNAN(F(S0.f16)):
|
|
D0.f16 = S1.f16
|
|
elif isNAN(F(S1.f16)):
|
|
D0.f16 = S0.f16
|
|
elif ((F(S0.f16) == +0.0) and (F(S1.f16) == -0.0)):
|
|
D0.f16 = S0.f16
|
|
elif ((F(S0.f16) == -0.0) and (F(S1.f16) == +0.0)):
|
|
D0.f16 = S1.f16
|
|
elif WAVE_MODE.IEEE:
|
|
D0.f16 = ((S0.f16) if (S0.f16 >= S1.f16) else (S1.f16))
|
|
else:
|
|
D0.f16 = ((S0.f16) if (S0.f16 > S1.f16) else (S1.f16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))):
|
|
D0.f16 = F(cvtToQuietNAN(F(S0.f16)))
|
|
elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))):
|
|
D0.f16 = F(cvtToQuietNAN(F(S1.f16)))
|
|
elif isNAN(F(S0.f16)):
|
|
D0.f16 = S1.f16
|
|
elif isNAN(F(S1.f16)):
|
|
D0.f16 = S0.f16
|
|
elif ((F(S0.f16) == +0.0) and (F(S1.f16) == -0.0)):
|
|
D0.f16 = S1.f16
|
|
elif ((F(S0.f16) == -0.0) and (F(S1.f16) == +0.0)):
|
|
D0.f16 = S0.f16
|
|
else:
|
|
D0.f16 = ((S0.f16) if (S0.f16 < S1.f16) else (S1.f16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S0.f16 * F(2.0 ** (S1.i16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = S0.u32 + S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_SUB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = S0.u32 - S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_SUBREV_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = S1.u32 - S0.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_DOT2C_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.f32)
|
|
tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16)
|
|
tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16)
|
|
D0.f32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_DOT2C_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.i32)
|
|
tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16)
|
|
tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16)
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_DOT4C_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.i32)
|
|
tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8)
|
|
tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8)
|
|
tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8)
|
|
tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8)
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_DOT8C_I32_I4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.i32)
|
|
tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4)
|
|
tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4)
|
|
tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4)
|
|
tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4)
|
|
tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4)
|
|
tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4)
|
|
tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4)
|
|
tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4)
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = fma(S0.f32, S1.f32, D0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_PK_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16)
|
|
D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ~(S0.u32 ^ S1.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP2Op_V_DOT2C_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.f32)
|
|
tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16)
|
|
tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16)
|
|
D0.f32 = tmp
|
|
return {'D0': D0}
|
|
|
|
VOP2Op_FUNCTIONS = {
|
|
VOP2Op.V_CNDMASK_B32: _VOP2Op_V_CNDMASK_B32,
|
|
VOP2Op.V_ADD_F32: _VOP2Op_V_ADD_F32,
|
|
VOP2Op.V_SUB_F32: _VOP2Op_V_SUB_F32,
|
|
VOP2Op.V_SUBREV_F32: _VOP2Op_V_SUBREV_F32,
|
|
VOP2Op.V_FMAC_F64: _VOP2Op_V_FMAC_F64,
|
|
VOP2Op.V_MUL_F32: _VOP2Op_V_MUL_F32,
|
|
VOP2Op.V_MUL_I32_I24: _VOP2Op_V_MUL_I32_I24,
|
|
VOP2Op.V_MUL_HI_I32_I24: _VOP2Op_V_MUL_HI_I32_I24,
|
|
VOP2Op.V_MUL_U32_U24: _VOP2Op_V_MUL_U32_U24,
|
|
VOP2Op.V_MUL_HI_U32_U24: _VOP2Op_V_MUL_HI_U32_U24,
|
|
VOP2Op.V_MIN_F32: _VOP2Op_V_MIN_F32,
|
|
VOP2Op.V_MAX_F32: _VOP2Op_V_MAX_F32,
|
|
VOP2Op.V_MIN_I32: _VOP2Op_V_MIN_I32,
|
|
VOP2Op.V_MAX_I32: _VOP2Op_V_MAX_I32,
|
|
VOP2Op.V_MIN_U32: _VOP2Op_V_MIN_U32,
|
|
VOP2Op.V_MAX_U32: _VOP2Op_V_MAX_U32,
|
|
VOP2Op.V_LSHRREV_B32: _VOP2Op_V_LSHRREV_B32,
|
|
VOP2Op.V_ASHRREV_I32: _VOP2Op_V_ASHRREV_I32,
|
|
VOP2Op.V_LSHLREV_B32: _VOP2Op_V_LSHLREV_B32,
|
|
VOP2Op.V_AND_B32: _VOP2Op_V_AND_B32,
|
|
VOP2Op.V_OR_B32: _VOP2Op_V_OR_B32,
|
|
VOP2Op.V_XOR_B32: _VOP2Op_V_XOR_B32,
|
|
VOP2Op.V_FMAMK_F32: _VOP2Op_V_FMAMK_F32,
|
|
VOP2Op.V_FMAAK_F32: _VOP2Op_V_FMAAK_F32,
|
|
VOP2Op.V_ADD_CO_U32: _VOP2Op_V_ADD_CO_U32,
|
|
VOP2Op.V_SUB_CO_U32: _VOP2Op_V_SUB_CO_U32,
|
|
VOP2Op.V_SUBREV_CO_U32: _VOP2Op_V_SUBREV_CO_U32,
|
|
VOP2Op.V_ADDC_CO_U32: _VOP2Op_V_ADDC_CO_U32,
|
|
VOP2Op.V_SUBB_CO_U32: _VOP2Op_V_SUBB_CO_U32,
|
|
VOP2Op.V_SUBBREV_CO_U32: _VOP2Op_V_SUBBREV_CO_U32,
|
|
VOP2Op.V_ADD_F16: _VOP2Op_V_ADD_F16,
|
|
VOP2Op.V_SUB_F16: _VOP2Op_V_SUB_F16,
|
|
VOP2Op.V_SUBREV_F16: _VOP2Op_V_SUBREV_F16,
|
|
VOP2Op.V_MUL_F16: _VOP2Op_V_MUL_F16,
|
|
VOP2Op.V_MAC_F16: _VOP2Op_V_MAC_F16,
|
|
VOP2Op.V_MADMK_F16: _VOP2Op_V_MADMK_F16,
|
|
VOP2Op.V_MADAK_F16: _VOP2Op_V_MADAK_F16,
|
|
VOP2Op.V_ADD_U16: _VOP2Op_V_ADD_U16,
|
|
VOP2Op.V_SUB_U16: _VOP2Op_V_SUB_U16,
|
|
VOP2Op.V_SUBREV_U16: _VOP2Op_V_SUBREV_U16,
|
|
VOP2Op.V_MUL_LO_U16: _VOP2Op_V_MUL_LO_U16,
|
|
VOP2Op.V_LSHLREV_B16: _VOP2Op_V_LSHLREV_B16,
|
|
VOP2Op.V_LSHRREV_B16: _VOP2Op_V_LSHRREV_B16,
|
|
VOP2Op.V_ASHRREV_I16: _VOP2Op_V_ASHRREV_I16,
|
|
VOP2Op.V_MAX_F16: _VOP2Op_V_MAX_F16,
|
|
VOP2Op.V_MIN_F16: _VOP2Op_V_MIN_F16,
|
|
VOP2Op.V_MAX_U16: _VOP2Op_V_MAX_U16,
|
|
VOP2Op.V_MAX_I16: _VOP2Op_V_MAX_I16,
|
|
VOP2Op.V_MIN_U16: _VOP2Op_V_MIN_U16,
|
|
VOP2Op.V_MIN_I16: _VOP2Op_V_MIN_I16,
|
|
VOP2Op.V_LDEXP_F16: _VOP2Op_V_LDEXP_F16,
|
|
VOP2Op.V_ADD_U32: _VOP2Op_V_ADD_U32,
|
|
VOP2Op.V_SUB_U32: _VOP2Op_V_SUB_U32,
|
|
VOP2Op.V_SUBREV_U32: _VOP2Op_V_SUBREV_U32,
|
|
VOP2Op.V_DOT2C_F32_F16: _VOP2Op_V_DOT2C_F32_F16,
|
|
VOP2Op.V_DOT2C_I32_I16: _VOP2Op_V_DOT2C_I32_I16,
|
|
VOP2Op.V_DOT4C_I32_I8: _VOP2Op_V_DOT4C_I32_I8,
|
|
VOP2Op.V_DOT8C_I32_I4: _VOP2Op_V_DOT8C_I32_I4,
|
|
VOP2Op.V_FMAC_F32: _VOP2Op_V_FMAC_F32,
|
|
VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16,
|
|
VOP2Op.V_XNOR_B32: _VOP2Op_V_XNOR_B32,
|
|
VOP2Op.V_DOT2C_F32_BF16: _VOP2Op_V_DOT2C_F32_BF16,
|
|
}
|
|
|
|
def _VOP3POp_V_PK_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16
|
|
tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16
|
|
tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16
|
|
D0.b32 = tmp.b32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_ADD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16
|
|
tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_SUB_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16
|
|
tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32)
|
|
tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32)
|
|
D0.b32 = tmp.b32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32)
|
|
tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32)
|
|
D0.b32 = tmp.b32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32)
|
|
tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32)
|
|
D0.b32 = tmp.b32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 >= S1[15 : 0].i16) else (S1[15 : 0].i16))
|
|
tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 >= S1[31 : 16].i16) else (S1[31 : 16].i16))
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 < S1[15 : 0].i16) else (S1[15 : 0].i16))
|
|
tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 < S1[31 : 16].i16) else (S1[31 : 16].i16))
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16
|
|
tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16
|
|
tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16
|
|
tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 >= S1[15 : 0].u16) else (S1[15 : 0].u16))
|
|
tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 >= S1[31 : 16].u16) else (S1[31 : 16].u16))
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 < S1[15 : 0].u16) else (S1[15 : 0].u16))
|
|
tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 < S1[31 : 16].u16) else (S1[31 : 16].u16))
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)
|
|
tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16
|
|
tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16
|
|
tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16)
|
|
tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16)
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16)
|
|
tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16)
|
|
D0.b32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_DOT2_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S2.f32)
|
|
tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16)
|
|
tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16)
|
|
D0.f32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_DOT2_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S2.i32)
|
|
tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16)
|
|
tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16)
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_DOT2_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S2.u32)
|
|
tmp += u16_to_u32(S0[15 : 0].u16) * u16_to_u32(S1[15 : 0].u16)
|
|
tmp += u16_to_u32(S0[31 : 16].u16) * u16_to_u32(S1[31 : 16].u16)
|
|
D0.u32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_DOT4_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S2.i32)
|
|
tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8)
|
|
tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8)
|
|
tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8)
|
|
tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8)
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_DOT4_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S2.u32)
|
|
tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8)
|
|
tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8)
|
|
tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8)
|
|
tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8)
|
|
D0.u32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_DOT8_I32_I4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S2.i32)
|
|
tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4)
|
|
tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4)
|
|
tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4)
|
|
tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4)
|
|
tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4)
|
|
tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4)
|
|
tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4)
|
|
tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4)
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_DOT8_U32_U4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S2.u32)
|
|
tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4)
|
|
tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4)
|
|
tmp += u4_to_u32(S0[11 : 8].u4) * u4_to_u32(S1[11 : 8].u4)
|
|
tmp += u4_to_u32(S0[15 : 12].u4) * u4_to_u32(S1[15 : 12].u4)
|
|
tmp += u4_to_u32(S0[19 : 16].u4) * u4_to_u32(S1[19 : 16].u4)
|
|
tmp += u4_to_u32(S0[23 : 20].u4) * u4_to_u32(S1[23 : 20].u4)
|
|
tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4)
|
|
tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4)
|
|
D0.u32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_FMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[31 : 0].f32 = fma(S0[31 : 0].f32, S1[31 : 0].f32, S2[31 : 0].f32)
|
|
tmp[63 : 32].f32 = fma(S0[63 : 32].f32, S1[63 : 32].f32, S2[63 : 32].f32)
|
|
D0.b64 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[31 : 0].f32 = S0[31 : 0].f32 * S1[31 : 0].f32
|
|
tmp[63 : 32].f32 = S0[63 : 32].f32 * S1[63 : 32].f32
|
|
D0.b64 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[31 : 0].f32 = S0[31 : 0].f32 + S1[31 : 0].f32
|
|
tmp[63 : 32].f32 = S0[63 : 32].f32 + S1[63 : 32].f32
|
|
D0.b64 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp0.u32 = S0.u32[OPSEL[0].i32 * 32 + 31 : OPSEL[0].i32 * 32]
|
|
tmp1.u32 = S1.u32[OPSEL[1].i32 * 32 + 31 : OPSEL[1].i32 * 32]
|
|
D0.u32[31 : 0] = tmp0.u32
|
|
D0.u32[63 : 32] = tmp1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_DOT2_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(F(S0[15 : 0].bf16) * F(S1[15 : 0].bf16))
|
|
tmp += F(S0[31 : 16].bf16) * F(S1[31 : 16].bf16)
|
|
tmp += S2.f32
|
|
D0.f32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MINIMUM3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[31 : 16].f16 = F(v_minimum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16))
|
|
tmp[15 : 0].f16 = F(v_minimum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16))
|
|
D0.b32 = tmp.b32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3POp_V_PK_MAXIMUM3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[31 : 16].f16 = F(v_maximum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16))
|
|
tmp[15 : 0].f16 = F(v_maximum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16))
|
|
D0.b32 = tmp.b32
|
|
return {'D0': D0}
|
|
|
|
VOP3POp_FUNCTIONS = {
|
|
VOP3POp.V_PK_MAD_I16: _VOP3POp_V_PK_MAD_I16,
|
|
VOP3POp.V_PK_MUL_LO_U16: _VOP3POp_V_PK_MUL_LO_U16,
|
|
VOP3POp.V_PK_ADD_I16: _VOP3POp_V_PK_ADD_I16,
|
|
VOP3POp.V_PK_SUB_I16: _VOP3POp_V_PK_SUB_I16,
|
|
VOP3POp.V_PK_LSHLREV_B16: _VOP3POp_V_PK_LSHLREV_B16,
|
|
VOP3POp.V_PK_LSHRREV_B16: _VOP3POp_V_PK_LSHRREV_B16,
|
|
VOP3POp.V_PK_ASHRREV_I16: _VOP3POp_V_PK_ASHRREV_I16,
|
|
VOP3POp.V_PK_MAX_I16: _VOP3POp_V_PK_MAX_I16,
|
|
VOP3POp.V_PK_MIN_I16: _VOP3POp_V_PK_MIN_I16,
|
|
VOP3POp.V_PK_MAD_U16: _VOP3POp_V_PK_MAD_U16,
|
|
VOP3POp.V_PK_ADD_U16: _VOP3POp_V_PK_ADD_U16,
|
|
VOP3POp.V_PK_SUB_U16: _VOP3POp_V_PK_SUB_U16,
|
|
VOP3POp.V_PK_MAX_U16: _VOP3POp_V_PK_MAX_U16,
|
|
VOP3POp.V_PK_MIN_U16: _VOP3POp_V_PK_MIN_U16,
|
|
VOP3POp.V_PK_FMA_F16: _VOP3POp_V_PK_FMA_F16,
|
|
VOP3POp.V_PK_ADD_F16: _VOP3POp_V_PK_ADD_F16,
|
|
VOP3POp.V_PK_MUL_F16: _VOP3POp_V_PK_MUL_F16,
|
|
VOP3POp.V_PK_MIN_F16: _VOP3POp_V_PK_MIN_F16,
|
|
VOP3POp.V_PK_MAX_F16: _VOP3POp_V_PK_MAX_F16,
|
|
VOP3POp.V_DOT2_F32_F16: _VOP3POp_V_DOT2_F32_F16,
|
|
VOP3POp.V_DOT2_I32_I16: _VOP3POp_V_DOT2_I32_I16,
|
|
VOP3POp.V_DOT2_U32_U16: _VOP3POp_V_DOT2_U32_U16,
|
|
VOP3POp.V_DOT4_I32_I8: _VOP3POp_V_DOT4_I32_I8,
|
|
VOP3POp.V_DOT4_U32_U8: _VOP3POp_V_DOT4_U32_U8,
|
|
VOP3POp.V_DOT8_I32_I4: _VOP3POp_V_DOT8_I32_I4,
|
|
VOP3POp.V_DOT8_U32_U4: _VOP3POp_V_DOT8_U32_U4,
|
|
VOP3POp.V_PK_FMA_F32: _VOP3POp_V_PK_FMA_F32,
|
|
VOP3POp.V_PK_MUL_F32: _VOP3POp_V_PK_MUL_F32,
|
|
VOP3POp.V_PK_ADD_F32: _VOP3POp_V_PK_ADD_F32,
|
|
VOP3POp.V_PK_MOV_B32: _VOP3POp_V_PK_MOV_B32,
|
|
VOP3POp.V_DOT2_F32_BF16: _VOP3POp_V_DOT2_F32_BF16,
|
|
VOP3POp.V_PK_MINIMUM3_F16: _VOP3POp_V_PK_MINIMUM3_F16,
|
|
VOP3POp.V_PK_MAXIMUM3_F16: _VOP3POp_V_PK_MAXIMUM3_F16,
|
|
}
|
|
|
|
def _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(F(S0.f32)):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(F(S0.f32)):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f32) == 255:
|
|
result = S1.u32[((2) if (sign(S0.f32)) else (9))]
|
|
elif exponent(S0.f32) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f32)) else (8))]
|
|
elif F(abs(S0.f32)) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f32)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f32)) else (6))]
|
|
D0.u64[laneId] = result
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(F(S0.f32)):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(F(S0.f32)):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f32) == 255:
|
|
result = S1.u32[((2) if (sign(S0.f32)) else (9))]
|
|
elif exponent(S0.f32) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f32)) else (8))]
|
|
elif F(abs(S0.f32)) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f32)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f32)) else (6))]
|
|
EXEC.u64[laneId] = D0.u64[laneId] = result
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(S0.f64):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(S0.f64):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f64) == 2047:
|
|
result = S1.u32[((2) if (sign(S0.f64)) else (9))]
|
|
elif exponent(S0.f64) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f64)) else (8))]
|
|
elif abs(S0.f64) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f64)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f64)) else (6))]
|
|
D0.u64[laneId] = result
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(S0.f64):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(S0.f64):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f64) == 2047:
|
|
result = S1.u32[((2) if (sign(S0.f64)) else (9))]
|
|
elif exponent(S0.f64) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f64)) else (8))]
|
|
elif abs(S0.f64) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f64)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f64)) else (6))]
|
|
EXEC.u64[laneId] = D0.u64[laneId] = result
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(F(S0.f16)):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(F(S0.f16)):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f16) == 31:
|
|
result = S1.u32[((2) if (sign(S0.f16)) else (9))]
|
|
elif exponent(S0.f16) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f16)) else (8))]
|
|
elif F(abs(S0.f16)) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f16)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f16)) else (6))]
|
|
D0.u64[laneId] = result
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(F(S0.f16)):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(F(S0.f16)):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f16) == 31:
|
|
result = S1.u32[((2) if (sign(S0.f16)) else (9))]
|
|
elif exponent(S0.f16) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f16)) else (8))]
|
|
elif F(abs(S0.f16)) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f16)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f16)) else (6))]
|
|
EXEC.u64[laneId] = D0.u64[laneId] = result
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMP_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 < S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 == S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 <= S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 > S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 != S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 >= S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16)))
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16)))
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 >= S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 != S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 > S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 <= S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 == S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 < S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMPX_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 != S1.f16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16)))
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16)))
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 >= S1.f16)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 != S1.f16)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 > S1.f16)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 <= S1.f16)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 == S1.f16)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 < S1.f16)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMP_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 < S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 == S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 <= S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 > S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 != S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 >= S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32)))
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32)))
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 >= S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 != S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 > S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 <= S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 == S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 < S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMPX_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 != S1.f32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32)))
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32)))
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 >= S1.f32)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 != S1.f32)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 > S1.f32)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 <= S1.f32)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 == S1.f32)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 < S1.f32)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMP_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 < S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 == S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 <= S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 > S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 != S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 >= S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64))
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64))
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 >= S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 != S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 > S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 <= S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 == S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 < S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMPX_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 != S1.f64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64))
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64))
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 >= S1.f64)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 != S1.f64)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 > S1.f64)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 <= S1.f64)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 == S1.f64)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 < S1.f64)
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMP_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 < S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 == S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 <= S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 > S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 != S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 >= S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 < S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 == S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 <= S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 > S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 != S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 >= S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMPX_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMP_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 < S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 == S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 <= S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 > S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 != S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 >= S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 < S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 == S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 <= S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 > S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 != S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 >= S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMPX_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 != S1.i32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 != S1.u32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMP_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 < S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 == S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 <= S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 > S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 != S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 >= S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 < S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 == S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 <= S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 > S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 != S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 >= S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMP_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOPCOp_V_CMPX_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 != S1.i64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 != S1.u64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
def _VOPCOp_V_CMPX_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
VDST = Reg(vdst_idx)
|
|
# --- compiled pseudocode ---
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
OFFSET0 = Unsigned byte offset added to the address from the ADDR VGPR.
|
|
OFFSET1 = Unsigned byte offset added to the address from the ADDR VGPR.
|
|
VDST = Destination VGPR 0- 255.
|
|
return {'D0': D0, 'EXEC': EXEC}
|
|
|
|
VOPCOp_FUNCTIONS = {
|
|
VOPCOp.V_CMP_CLASS_F32: _VOPCOp_V_CMP_CLASS_F32,
|
|
VOPCOp.V_CMPX_CLASS_F32: _VOPCOp_V_CMPX_CLASS_F32,
|
|
VOPCOp.V_CMP_CLASS_F64: _VOPCOp_V_CMP_CLASS_F64,
|
|
VOPCOp.V_CMPX_CLASS_F64: _VOPCOp_V_CMPX_CLASS_F64,
|
|
VOPCOp.V_CMP_CLASS_F16: _VOPCOp_V_CMP_CLASS_F16,
|
|
VOPCOp.V_CMPX_CLASS_F16: _VOPCOp_V_CMPX_CLASS_F16,
|
|
VOPCOp.V_CMP_F_F16: _VOPCOp_V_CMP_F_F16,
|
|
VOPCOp.V_CMP_LT_F16: _VOPCOp_V_CMP_LT_F16,
|
|
VOPCOp.V_CMP_EQ_F16: _VOPCOp_V_CMP_EQ_F16,
|
|
VOPCOp.V_CMP_LE_F16: _VOPCOp_V_CMP_LE_F16,
|
|
VOPCOp.V_CMP_GT_F16: _VOPCOp_V_CMP_GT_F16,
|
|
VOPCOp.V_CMP_LG_F16: _VOPCOp_V_CMP_LG_F16,
|
|
VOPCOp.V_CMP_GE_F16: _VOPCOp_V_CMP_GE_F16,
|
|
VOPCOp.V_CMP_O_F16: _VOPCOp_V_CMP_O_F16,
|
|
VOPCOp.V_CMP_U_F16: _VOPCOp_V_CMP_U_F16,
|
|
VOPCOp.V_CMP_NGE_F16: _VOPCOp_V_CMP_NGE_F16,
|
|
VOPCOp.V_CMP_NLG_F16: _VOPCOp_V_CMP_NLG_F16,
|
|
VOPCOp.V_CMP_NGT_F16: _VOPCOp_V_CMP_NGT_F16,
|
|
VOPCOp.V_CMP_NLE_F16: _VOPCOp_V_CMP_NLE_F16,
|
|
VOPCOp.V_CMP_NEQ_F16: _VOPCOp_V_CMP_NEQ_F16,
|
|
VOPCOp.V_CMP_NLT_F16: _VOPCOp_V_CMP_NLT_F16,
|
|
VOPCOp.V_CMP_TRU_F16: _VOPCOp_V_CMP_TRU_F16,
|
|
VOPCOp.V_CMPX_F_F16: _VOPCOp_V_CMPX_F_F16,
|
|
VOPCOp.V_CMPX_LT_F16: _VOPCOp_V_CMPX_LT_F16,
|
|
VOPCOp.V_CMPX_EQ_F16: _VOPCOp_V_CMPX_EQ_F16,
|
|
VOPCOp.V_CMPX_LE_F16: _VOPCOp_V_CMPX_LE_F16,
|
|
VOPCOp.V_CMPX_GT_F16: _VOPCOp_V_CMPX_GT_F16,
|
|
VOPCOp.V_CMPX_LG_F16: _VOPCOp_V_CMPX_LG_F16,
|
|
VOPCOp.V_CMPX_GE_F16: _VOPCOp_V_CMPX_GE_F16,
|
|
VOPCOp.V_CMPX_O_F16: _VOPCOp_V_CMPX_O_F16,
|
|
VOPCOp.V_CMPX_U_F16: _VOPCOp_V_CMPX_U_F16,
|
|
VOPCOp.V_CMPX_NGE_F16: _VOPCOp_V_CMPX_NGE_F16,
|
|
VOPCOp.V_CMPX_NLG_F16: _VOPCOp_V_CMPX_NLG_F16,
|
|
VOPCOp.V_CMPX_NGT_F16: _VOPCOp_V_CMPX_NGT_F16,
|
|
VOPCOp.V_CMPX_NLE_F16: _VOPCOp_V_CMPX_NLE_F16,
|
|
VOPCOp.V_CMPX_NEQ_F16: _VOPCOp_V_CMPX_NEQ_F16,
|
|
VOPCOp.V_CMPX_NLT_F16: _VOPCOp_V_CMPX_NLT_F16,
|
|
VOPCOp.V_CMPX_TRU_F16: _VOPCOp_V_CMPX_TRU_F16,
|
|
VOPCOp.V_CMP_F_F32: _VOPCOp_V_CMP_F_F32,
|
|
VOPCOp.V_CMP_LT_F32: _VOPCOp_V_CMP_LT_F32,
|
|
VOPCOp.V_CMP_EQ_F32: _VOPCOp_V_CMP_EQ_F32,
|
|
VOPCOp.V_CMP_LE_F32: _VOPCOp_V_CMP_LE_F32,
|
|
VOPCOp.V_CMP_GT_F32: _VOPCOp_V_CMP_GT_F32,
|
|
VOPCOp.V_CMP_LG_F32: _VOPCOp_V_CMP_LG_F32,
|
|
VOPCOp.V_CMP_GE_F32: _VOPCOp_V_CMP_GE_F32,
|
|
VOPCOp.V_CMP_O_F32: _VOPCOp_V_CMP_O_F32,
|
|
VOPCOp.V_CMP_U_F32: _VOPCOp_V_CMP_U_F32,
|
|
VOPCOp.V_CMP_NGE_F32: _VOPCOp_V_CMP_NGE_F32,
|
|
VOPCOp.V_CMP_NLG_F32: _VOPCOp_V_CMP_NLG_F32,
|
|
VOPCOp.V_CMP_NGT_F32: _VOPCOp_V_CMP_NGT_F32,
|
|
VOPCOp.V_CMP_NLE_F32: _VOPCOp_V_CMP_NLE_F32,
|
|
VOPCOp.V_CMP_NEQ_F32: _VOPCOp_V_CMP_NEQ_F32,
|
|
VOPCOp.V_CMP_NLT_F32: _VOPCOp_V_CMP_NLT_F32,
|
|
VOPCOp.V_CMP_TRU_F32: _VOPCOp_V_CMP_TRU_F32,
|
|
VOPCOp.V_CMPX_F_F32: _VOPCOp_V_CMPX_F_F32,
|
|
VOPCOp.V_CMPX_LT_F32: _VOPCOp_V_CMPX_LT_F32,
|
|
VOPCOp.V_CMPX_EQ_F32: _VOPCOp_V_CMPX_EQ_F32,
|
|
VOPCOp.V_CMPX_LE_F32: _VOPCOp_V_CMPX_LE_F32,
|
|
VOPCOp.V_CMPX_GT_F32: _VOPCOp_V_CMPX_GT_F32,
|
|
VOPCOp.V_CMPX_LG_F32: _VOPCOp_V_CMPX_LG_F32,
|
|
VOPCOp.V_CMPX_GE_F32: _VOPCOp_V_CMPX_GE_F32,
|
|
VOPCOp.V_CMPX_O_F32: _VOPCOp_V_CMPX_O_F32,
|
|
VOPCOp.V_CMPX_U_F32: _VOPCOp_V_CMPX_U_F32,
|
|
VOPCOp.V_CMPX_NGE_F32: _VOPCOp_V_CMPX_NGE_F32,
|
|
VOPCOp.V_CMPX_NLG_F32: _VOPCOp_V_CMPX_NLG_F32,
|
|
VOPCOp.V_CMPX_NGT_F32: _VOPCOp_V_CMPX_NGT_F32,
|
|
VOPCOp.V_CMPX_NLE_F32: _VOPCOp_V_CMPX_NLE_F32,
|
|
VOPCOp.V_CMPX_NEQ_F32: _VOPCOp_V_CMPX_NEQ_F32,
|
|
VOPCOp.V_CMPX_NLT_F32: _VOPCOp_V_CMPX_NLT_F32,
|
|
VOPCOp.V_CMPX_TRU_F32: _VOPCOp_V_CMPX_TRU_F32,
|
|
VOPCOp.V_CMP_F_F64: _VOPCOp_V_CMP_F_F64,
|
|
VOPCOp.V_CMP_LT_F64: _VOPCOp_V_CMP_LT_F64,
|
|
VOPCOp.V_CMP_EQ_F64: _VOPCOp_V_CMP_EQ_F64,
|
|
VOPCOp.V_CMP_LE_F64: _VOPCOp_V_CMP_LE_F64,
|
|
VOPCOp.V_CMP_GT_F64: _VOPCOp_V_CMP_GT_F64,
|
|
VOPCOp.V_CMP_LG_F64: _VOPCOp_V_CMP_LG_F64,
|
|
VOPCOp.V_CMP_GE_F64: _VOPCOp_V_CMP_GE_F64,
|
|
VOPCOp.V_CMP_O_F64: _VOPCOp_V_CMP_O_F64,
|
|
VOPCOp.V_CMP_U_F64: _VOPCOp_V_CMP_U_F64,
|
|
VOPCOp.V_CMP_NGE_F64: _VOPCOp_V_CMP_NGE_F64,
|
|
VOPCOp.V_CMP_NLG_F64: _VOPCOp_V_CMP_NLG_F64,
|
|
VOPCOp.V_CMP_NGT_F64: _VOPCOp_V_CMP_NGT_F64,
|
|
VOPCOp.V_CMP_NLE_F64: _VOPCOp_V_CMP_NLE_F64,
|
|
VOPCOp.V_CMP_NEQ_F64: _VOPCOp_V_CMP_NEQ_F64,
|
|
VOPCOp.V_CMP_NLT_F64: _VOPCOp_V_CMP_NLT_F64,
|
|
VOPCOp.V_CMP_TRU_F64: _VOPCOp_V_CMP_TRU_F64,
|
|
VOPCOp.V_CMPX_F_F64: _VOPCOp_V_CMPX_F_F64,
|
|
VOPCOp.V_CMPX_LT_F64: _VOPCOp_V_CMPX_LT_F64,
|
|
VOPCOp.V_CMPX_EQ_F64: _VOPCOp_V_CMPX_EQ_F64,
|
|
VOPCOp.V_CMPX_LE_F64: _VOPCOp_V_CMPX_LE_F64,
|
|
VOPCOp.V_CMPX_GT_F64: _VOPCOp_V_CMPX_GT_F64,
|
|
VOPCOp.V_CMPX_LG_F64: _VOPCOp_V_CMPX_LG_F64,
|
|
VOPCOp.V_CMPX_GE_F64: _VOPCOp_V_CMPX_GE_F64,
|
|
VOPCOp.V_CMPX_O_F64: _VOPCOp_V_CMPX_O_F64,
|
|
VOPCOp.V_CMPX_U_F64: _VOPCOp_V_CMPX_U_F64,
|
|
VOPCOp.V_CMPX_NGE_F64: _VOPCOp_V_CMPX_NGE_F64,
|
|
VOPCOp.V_CMPX_NLG_F64: _VOPCOp_V_CMPX_NLG_F64,
|
|
VOPCOp.V_CMPX_NGT_F64: _VOPCOp_V_CMPX_NGT_F64,
|
|
VOPCOp.V_CMPX_NLE_F64: _VOPCOp_V_CMPX_NLE_F64,
|
|
VOPCOp.V_CMPX_NEQ_F64: _VOPCOp_V_CMPX_NEQ_F64,
|
|
VOPCOp.V_CMPX_NLT_F64: _VOPCOp_V_CMPX_NLT_F64,
|
|
VOPCOp.V_CMPX_TRU_F64: _VOPCOp_V_CMPX_TRU_F64,
|
|
VOPCOp.V_CMP_F_I16: _VOPCOp_V_CMP_F_I16,
|
|
VOPCOp.V_CMP_LT_I16: _VOPCOp_V_CMP_LT_I16,
|
|
VOPCOp.V_CMP_EQ_I16: _VOPCOp_V_CMP_EQ_I16,
|
|
VOPCOp.V_CMP_LE_I16: _VOPCOp_V_CMP_LE_I16,
|
|
VOPCOp.V_CMP_GT_I16: _VOPCOp_V_CMP_GT_I16,
|
|
VOPCOp.V_CMP_NE_I16: _VOPCOp_V_CMP_NE_I16,
|
|
VOPCOp.V_CMP_GE_I16: _VOPCOp_V_CMP_GE_I16,
|
|
VOPCOp.V_CMP_T_I16: _VOPCOp_V_CMP_T_I16,
|
|
VOPCOp.V_CMP_F_U16: _VOPCOp_V_CMP_F_U16,
|
|
VOPCOp.V_CMP_LT_U16: _VOPCOp_V_CMP_LT_U16,
|
|
VOPCOp.V_CMP_EQ_U16: _VOPCOp_V_CMP_EQ_U16,
|
|
VOPCOp.V_CMP_LE_U16: _VOPCOp_V_CMP_LE_U16,
|
|
VOPCOp.V_CMP_GT_U16: _VOPCOp_V_CMP_GT_U16,
|
|
VOPCOp.V_CMP_NE_U16: _VOPCOp_V_CMP_NE_U16,
|
|
VOPCOp.V_CMP_GE_U16: _VOPCOp_V_CMP_GE_U16,
|
|
VOPCOp.V_CMP_T_U16: _VOPCOp_V_CMP_T_U16,
|
|
VOPCOp.V_CMPX_F_I16: _VOPCOp_V_CMPX_F_I16,
|
|
VOPCOp.V_CMPX_LT_I16: _VOPCOp_V_CMPX_LT_I16,
|
|
VOPCOp.V_CMPX_EQ_I16: _VOPCOp_V_CMPX_EQ_I16,
|
|
VOPCOp.V_CMPX_LE_I16: _VOPCOp_V_CMPX_LE_I16,
|
|
VOPCOp.V_CMPX_GT_I16: _VOPCOp_V_CMPX_GT_I16,
|
|
VOPCOp.V_CMPX_NE_I16: _VOPCOp_V_CMPX_NE_I16,
|
|
VOPCOp.V_CMPX_GE_I16: _VOPCOp_V_CMPX_GE_I16,
|
|
VOPCOp.V_CMPX_T_I16: _VOPCOp_V_CMPX_T_I16,
|
|
VOPCOp.V_CMPX_F_U16: _VOPCOp_V_CMPX_F_U16,
|
|
VOPCOp.V_CMPX_LT_U16: _VOPCOp_V_CMPX_LT_U16,
|
|
VOPCOp.V_CMPX_EQ_U16: _VOPCOp_V_CMPX_EQ_U16,
|
|
VOPCOp.V_CMPX_LE_U16: _VOPCOp_V_CMPX_LE_U16,
|
|
VOPCOp.V_CMPX_GT_U16: _VOPCOp_V_CMPX_GT_U16,
|
|
VOPCOp.V_CMPX_NE_U16: _VOPCOp_V_CMPX_NE_U16,
|
|
VOPCOp.V_CMPX_GE_U16: _VOPCOp_V_CMPX_GE_U16,
|
|
VOPCOp.V_CMPX_T_U16: _VOPCOp_V_CMPX_T_U16,
|
|
VOPCOp.V_CMP_F_I32: _VOPCOp_V_CMP_F_I32,
|
|
VOPCOp.V_CMP_LT_I32: _VOPCOp_V_CMP_LT_I32,
|
|
VOPCOp.V_CMP_EQ_I32: _VOPCOp_V_CMP_EQ_I32,
|
|
VOPCOp.V_CMP_LE_I32: _VOPCOp_V_CMP_LE_I32,
|
|
VOPCOp.V_CMP_GT_I32: _VOPCOp_V_CMP_GT_I32,
|
|
VOPCOp.V_CMP_NE_I32: _VOPCOp_V_CMP_NE_I32,
|
|
VOPCOp.V_CMP_GE_I32: _VOPCOp_V_CMP_GE_I32,
|
|
VOPCOp.V_CMP_T_I32: _VOPCOp_V_CMP_T_I32,
|
|
VOPCOp.V_CMP_F_U32: _VOPCOp_V_CMP_F_U32,
|
|
VOPCOp.V_CMP_LT_U32: _VOPCOp_V_CMP_LT_U32,
|
|
VOPCOp.V_CMP_EQ_U32: _VOPCOp_V_CMP_EQ_U32,
|
|
VOPCOp.V_CMP_LE_U32: _VOPCOp_V_CMP_LE_U32,
|
|
VOPCOp.V_CMP_GT_U32: _VOPCOp_V_CMP_GT_U32,
|
|
VOPCOp.V_CMP_NE_U32: _VOPCOp_V_CMP_NE_U32,
|
|
VOPCOp.V_CMP_GE_U32: _VOPCOp_V_CMP_GE_U32,
|
|
VOPCOp.V_CMP_T_U32: _VOPCOp_V_CMP_T_U32,
|
|
VOPCOp.V_CMPX_F_I32: _VOPCOp_V_CMPX_F_I32,
|
|
VOPCOp.V_CMPX_LT_I32: _VOPCOp_V_CMPX_LT_I32,
|
|
VOPCOp.V_CMPX_EQ_I32: _VOPCOp_V_CMPX_EQ_I32,
|
|
VOPCOp.V_CMPX_LE_I32: _VOPCOp_V_CMPX_LE_I32,
|
|
VOPCOp.V_CMPX_GT_I32: _VOPCOp_V_CMPX_GT_I32,
|
|
VOPCOp.V_CMPX_NE_I32: _VOPCOp_V_CMPX_NE_I32,
|
|
VOPCOp.V_CMPX_GE_I32: _VOPCOp_V_CMPX_GE_I32,
|
|
VOPCOp.V_CMPX_T_I32: _VOPCOp_V_CMPX_T_I32,
|
|
VOPCOp.V_CMPX_F_U32: _VOPCOp_V_CMPX_F_U32,
|
|
VOPCOp.V_CMPX_LT_U32: _VOPCOp_V_CMPX_LT_U32,
|
|
VOPCOp.V_CMPX_EQ_U32: _VOPCOp_V_CMPX_EQ_U32,
|
|
VOPCOp.V_CMPX_LE_U32: _VOPCOp_V_CMPX_LE_U32,
|
|
VOPCOp.V_CMPX_GT_U32: _VOPCOp_V_CMPX_GT_U32,
|
|
VOPCOp.V_CMPX_NE_U32: _VOPCOp_V_CMPX_NE_U32,
|
|
VOPCOp.V_CMPX_GE_U32: _VOPCOp_V_CMPX_GE_U32,
|
|
VOPCOp.V_CMPX_T_U32: _VOPCOp_V_CMPX_T_U32,
|
|
VOPCOp.V_CMP_F_I64: _VOPCOp_V_CMP_F_I64,
|
|
VOPCOp.V_CMP_LT_I64: _VOPCOp_V_CMP_LT_I64,
|
|
VOPCOp.V_CMP_EQ_I64: _VOPCOp_V_CMP_EQ_I64,
|
|
VOPCOp.V_CMP_LE_I64: _VOPCOp_V_CMP_LE_I64,
|
|
VOPCOp.V_CMP_GT_I64: _VOPCOp_V_CMP_GT_I64,
|
|
VOPCOp.V_CMP_NE_I64: _VOPCOp_V_CMP_NE_I64,
|
|
VOPCOp.V_CMP_GE_I64: _VOPCOp_V_CMP_GE_I64,
|
|
VOPCOp.V_CMP_T_I64: _VOPCOp_V_CMP_T_I64,
|
|
VOPCOp.V_CMP_F_U64: _VOPCOp_V_CMP_F_U64,
|
|
VOPCOp.V_CMP_LT_U64: _VOPCOp_V_CMP_LT_U64,
|
|
VOPCOp.V_CMP_EQ_U64: _VOPCOp_V_CMP_EQ_U64,
|
|
VOPCOp.V_CMP_LE_U64: _VOPCOp_V_CMP_LE_U64,
|
|
VOPCOp.V_CMP_GT_U64: _VOPCOp_V_CMP_GT_U64,
|
|
VOPCOp.V_CMP_NE_U64: _VOPCOp_V_CMP_NE_U64,
|
|
VOPCOp.V_CMP_GE_U64: _VOPCOp_V_CMP_GE_U64,
|
|
VOPCOp.V_CMP_T_U64: _VOPCOp_V_CMP_T_U64,
|
|
VOPCOp.V_CMPX_F_I64: _VOPCOp_V_CMPX_F_I64,
|
|
VOPCOp.V_CMPX_LT_I64: _VOPCOp_V_CMPX_LT_I64,
|
|
VOPCOp.V_CMPX_EQ_I64: _VOPCOp_V_CMPX_EQ_I64,
|
|
VOPCOp.V_CMPX_LE_I64: _VOPCOp_V_CMPX_LE_I64,
|
|
VOPCOp.V_CMPX_GT_I64: _VOPCOp_V_CMPX_GT_I64,
|
|
VOPCOp.V_CMPX_NE_I64: _VOPCOp_V_CMPX_NE_I64,
|
|
VOPCOp.V_CMPX_GE_I64: _VOPCOp_V_CMPX_GE_I64,
|
|
VOPCOp.V_CMPX_T_I64: _VOPCOp_V_CMPX_T_I64,
|
|
VOPCOp.V_CMPX_F_U64: _VOPCOp_V_CMPX_F_U64,
|
|
VOPCOp.V_CMPX_LT_U64: _VOPCOp_V_CMPX_LT_U64,
|
|
VOPCOp.V_CMPX_EQ_U64: _VOPCOp_V_CMPX_EQ_U64,
|
|
VOPCOp.V_CMPX_LE_U64: _VOPCOp_V_CMPX_LE_U64,
|
|
VOPCOp.V_CMPX_GT_U64: _VOPCOp_V_CMPX_GT_U64,
|
|
VOPCOp.V_CMPX_NE_U64: _VOPCOp_V_CMPX_NE_U64,
|
|
VOPCOp.V_CMPX_GE_U64: _VOPCOp_V_CMPX_GE_U64,
|
|
VOPCOp.V_CMPX_T_U64: _VOPCOp_V_CMPX_T_U64,
|
|
}
|
|
|
|
def _VOP3AOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(F(S0.f32)):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(F(S0.f32)):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f32) == 255:
|
|
result = S1.u32[((2) if (sign(S0.f32)) else (9))]
|
|
elif exponent(S0.f32) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f32)) else (8))]
|
|
elif F(abs(S0.f32)) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f32)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f32)) else (6))]
|
|
D0.u64[laneId] = result
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(F(S0.f32)):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(F(S0.f32)):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f32) == 255:
|
|
result = S1.u32[((2) if (sign(S0.f32)) else (9))]
|
|
elif exponent(S0.f32) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f32)) else (8))]
|
|
elif F(abs(S0.f32)) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f32)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f32)) else (6))]
|
|
EXEC.u64[laneId] = D0.u64[laneId] = result
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(S0.f64):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(S0.f64):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f64) == 2047:
|
|
result = S1.u32[((2) if (sign(S0.f64)) else (9))]
|
|
elif exponent(S0.f64) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f64)) else (8))]
|
|
elif abs(S0.f64) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f64)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f64)) else (6))]
|
|
D0.u64[laneId] = result
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(S0.f64):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(S0.f64):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f64) == 2047:
|
|
result = S1.u32[((2) if (sign(S0.f64)) else (9))]
|
|
elif exponent(S0.f64) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f64)) else (8))]
|
|
elif abs(S0.f64) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f64)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f64)) else (6))]
|
|
EXEC.u64[laneId] = D0.u64[laneId] = result
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(F(S0.f16)):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(F(S0.f16)):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f16) == 31:
|
|
result = S1.u32[((2) if (sign(S0.f16)) else (9))]
|
|
elif exponent(S0.f16) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f16)) else (8))]
|
|
elif F(abs(S0.f16)) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f16)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f16)) else (6))]
|
|
D0.u64[laneId] = result
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if isSignalNAN(F(S0.f16)):
|
|
result = S1.u32[0]
|
|
elif isQuietNAN(F(S0.f16)):
|
|
result = S1.u32[1]
|
|
elif exponent(S0.f16) == 31:
|
|
result = S1.u32[((2) if (sign(S0.f16)) else (9))]
|
|
elif exponent(S0.f16) > 0:
|
|
result = S1.u32[((3) if (sign(S0.f16)) else (8))]
|
|
elif F(abs(S0.f16)) > 0.0:
|
|
result = S1.u32[((4) if (sign(S0.f16)) else (7))]
|
|
else:
|
|
result = S1.u32[((5) if (sign(S0.f16)) else (6))]
|
|
EXEC.u64[laneId] = D0.u64[laneId] = result
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 < S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 == S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 <= S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 > S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 != S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f16 >= S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16)))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16)))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 >= S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 != S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 > S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 <= S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 == S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f16 < S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 != S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16)))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16)))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 >= S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 != S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 > S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 <= S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 == S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 < S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 < S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 == S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 <= S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 > S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 != S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f32 >= S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32)))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32)))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 >= S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 != S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 > S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 <= S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 == S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f32 < S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 != S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32)))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32)))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 >= S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 != S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 > S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 <= S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 == S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 < S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 < S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 == S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 <= S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 > S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 != S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.f64 >= S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 >= S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 != S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 > S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 <= S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 == S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = not (S0.f64 < S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 != S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 >= S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 != S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 > S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 <= S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 == S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 < S1.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 < S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 == S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 <= S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 > S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 != S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i16 >= S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 < S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 == S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 <= S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 > S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 != S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u16 >= S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 < S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 == S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 <= S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 > S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 != S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i32 >= S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 < S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 == S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 <= S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 > S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 != S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u32 >= S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 != S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 != S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 < S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 == S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 <= S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 > S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 != S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.i64 >= S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 < S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 == S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 <= S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 > S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 != S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = S0.u64 >= S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMP_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 != S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 != S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CMPX_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
VDST = Reg(vdst_idx)
|
|
# --- compiled pseudocode ---
|
|
EXEC.u64[laneId] = D0.u64[laneId] = 1
|
|
OFFSET0 = Unsigned byte offset added to the address from the ADDR VGPR.
|
|
OFFSET1 = Unsigned byte offset added to the address from the ADDR VGPR.
|
|
VDST = Destination VGPR 0- 255.
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.b32 = S0.b32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
if EXEC == 0x0:
|
|
lane = 0
|
|
else:
|
|
lane = s_ff1_i32_b64(EXEC)
|
|
D0.b32 = VGPR[lane][SRC0.u32]
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = f64_to_i32(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = i32_to_f64(S0.i32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = i32_to_f32(S0.i32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = u32_to_f32(S0.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = f32_to_u32(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = f32_to_i32(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = f32_to_f16(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = f16_to_f32(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_RPI_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = f32_to_i32(floor(S0.f32 + 0.5))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_FLR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = f32_to_i32(floor(S0.f32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = f64_to_f32(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = f32_to_f64(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = u32_to_f32(S0[7 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = u32_to_f32(S0[15 : 8].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = u32_to_f32(S0[23 : 16].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = u32_to_f32(S0[31 : 24].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = f64_to_u32(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = u32_to_f64(S0.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = trunc(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = trunc(S0.f64)
|
|
if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)):
|
|
D0.f64 += 1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = floor(S0.f64 + 0.5)
|
|
if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)):
|
|
D0.f64 -= 1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = trunc(S0.f64)
|
|
if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)):
|
|
D0.f64 += -1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = S0.f32 + -floor(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = trunc(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = trunc(S0.f32)
|
|
if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)):
|
|
D0.f32 += 1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = floor(S0.f32 + 0.5)
|
|
if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)):
|
|
D0.f32 -= 1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = trunc(S0.f32)
|
|
if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)):
|
|
D0.f32 += -1.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = pow(2.0, S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = log2(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = 1.0 / S0.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = 1.0 / S0.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = 1.0 / sqrt(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = 1.0 / S0.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = 1.0 / sqrt(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = sqrt(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = sqrt(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = sin(S0.f32 * F(PI * 2.0))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = cos(S0.f32 * F(PI * 2.0))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ~S0.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32[31 : 0] = S0.u32[0 : 31]
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FFBH_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = -1
|
|
for i in range(0, int(31)+1):
|
|
if S0.u32[31 - i] == 1:
|
|
D0.i32 = i; break
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FFBL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = -1
|
|
for i in range(0, int(31)+1):
|
|
if S0.u32[i] == 1:
|
|
D0.i32 = i; break
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FFBH_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = -1
|
|
for i in range(1, int(31)+1):
|
|
if S0.i32[31 - i] != S0.i32[31]:
|
|
D0.i32 = i; break
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)):
|
|
D0.i32 = 0
|
|
else:
|
|
D0.i32 = exponent(S0.f64) - 1023 + 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)):
|
|
D0.f64 = S0.f64
|
|
else:
|
|
D0.f64 = mantissa(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = S0.f64 + -floor(S0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))):
|
|
D0.i32 = 0
|
|
else:
|
|
D0.i32 = exponent(S0.f32) - 127 + 1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))):
|
|
D0.f32 = S0.f32
|
|
else:
|
|
D0.f32 = mantissa(S0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.b64 = S0.b64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = u16_to_f16(S0.u16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = i16_to_f16(S0.i16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = f16_to_u16(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = f16_to_i16(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = 1.0 / S0.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = sqrt(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = 1.0 / sqrt(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = log2(S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = pow(2.0, S0.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = S0.f32 + S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = S0.f32 - S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = S1.f32 - S0.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FMAC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = fma(S0.f64, S1.f64, D0.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = S0.f32 * S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (S0.i24) * (S1.i24)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (((S0.i24) * (S1.i24)) >> 32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u24) * (S1.u24)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (((S0.u24) * (S1.u24)) >> 32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))):
|
|
D0.f32 = F(cvtToQuietNAN(F(S0.f32)))
|
|
elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))):
|
|
D0.f32 = F(cvtToQuietNAN(F(S1.f32)))
|
|
elif isNAN(F(S0.f32)):
|
|
D0.f32 = S1.f32
|
|
elif isNAN(F(S1.f32)):
|
|
D0.f32 = S0.f32
|
|
elif ((F(S0.f32) == +0.0) and (F(S1.f32) == -0.0)):
|
|
D0.f32 = S1.f32
|
|
elif ((F(S0.f32) == -0.0) and (F(S1.f32) == +0.0)):
|
|
D0.f32 = S0.f32
|
|
else:
|
|
D0.f32 = ((S0.f32) if (S0.f32 < S1.f32) else (S1.f32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))):
|
|
D0.f32 = F(cvtToQuietNAN(F(S0.f32)))
|
|
elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))):
|
|
D0.f32 = F(cvtToQuietNAN(F(S1.f32)))
|
|
elif isNAN(F(S0.f32)):
|
|
D0.f32 = S1.f32
|
|
elif isNAN(F(S1.f32)):
|
|
D0.f32 = S0.f32
|
|
elif ((F(S0.f32) == +0.0) and (F(S1.f32) == -0.0)):
|
|
D0.f32 = S0.f32
|
|
elif ((F(S0.f32) == -0.0) and (F(S1.f32) == +0.0)):
|
|
D0.f32 = S1.f32
|
|
elif WAVE_MODE.IEEE:
|
|
D0.f32 = ((S0.f32) if (S0.f32 >= S1.f32) else (S1.f32))
|
|
else:
|
|
D0.f32 = ((S0.f32) if (S0.f32 > S1.f32) else (S1.f32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S1.u32 >> S0[4 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (S1.i32 >> S0[4 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S1.u32 << S0[4 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 & S1.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 | S1.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 ^ S1.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S0.f16 + S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S0.f16 - S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S1.f16 - S0.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S0.f16 * S1.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.f16 * S1.f16 + D0.f16)
|
|
if OPSEL.u4[3]:
|
|
D0 = Reg(_pack(tmp.f16, D0[15 : 0]))
|
|
else:
|
|
D0 = Reg(_pack(0, tmp.f16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = S0.u16 + S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = S0.u16 - S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SUBREV_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = S1.u16 - S0.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = S0.u16 * S1.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = (S1.u16 << S0[3 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = (S1.u16 >> S0[3 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = (S1.i16 >> S0[3 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))):
|
|
D0.f16 = F(cvtToQuietNAN(F(S0.f16)))
|
|
elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))):
|
|
D0.f16 = F(cvtToQuietNAN(F(S1.f16)))
|
|
elif isNAN(F(S0.f16)):
|
|
D0.f16 = S1.f16
|
|
elif isNAN(F(S1.f16)):
|
|
D0.f16 = S0.f16
|
|
elif ((F(S0.f16) == +0.0) and (F(S1.f16) == -0.0)):
|
|
D0.f16 = S0.f16
|
|
elif ((F(S0.f16) == -0.0) and (F(S1.f16) == +0.0)):
|
|
D0.f16 = S1.f16
|
|
elif WAVE_MODE.IEEE:
|
|
D0.f16 = ((S0.f16) if (S0.f16 >= S1.f16) else (S1.f16))
|
|
else:
|
|
D0.f16 = ((S0.f16) if (S0.f16 > S1.f16) else (S1.f16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))):
|
|
D0.f16 = F(cvtToQuietNAN(F(S0.f16)))
|
|
elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))):
|
|
D0.f16 = F(cvtToQuietNAN(F(S1.f16)))
|
|
elif isNAN(F(S0.f16)):
|
|
D0.f16 = S1.f16
|
|
elif isNAN(F(S1.f16)):
|
|
D0.f16 = S0.f16
|
|
elif ((F(S0.f16) == +0.0) and (F(S1.f16) == -0.0)):
|
|
D0.f16 = S1.f16
|
|
elif ((F(S0.f16) == -0.0) and (F(S1.f16) == +0.0)):
|
|
D0.f16 = S0.f16
|
|
else:
|
|
D0.f16 = ((S0.f16) if (S0.f16 < S1.f16) else (S1.f16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S0.f16 * F(2.0 ** (S1.i16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = S0.u32 + S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SUB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = S0.u32 - S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SUBREV_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = S1.u32 - S0.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_DOT2C_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.f32)
|
|
tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16)
|
|
tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16)
|
|
D0.f32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_DOT2C_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.i32)
|
|
tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16)
|
|
tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16)
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_DOT4C_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.i32)
|
|
tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8)
|
|
tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8)
|
|
tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8)
|
|
tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8)
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_DOT8C_I32_I4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.i32)
|
|
tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4)
|
|
tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4)
|
|
tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4)
|
|
tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4)
|
|
tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4)
|
|
tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4)
|
|
tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4)
|
|
tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4)
|
|
D0.i32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = fma(S0.f32, S1.f32, D0.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_PK_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16)
|
|
D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ~(S0.u32 ^ S1.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAD_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (S0.i24) * (S1.i24) + S2.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAD_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u24) * (S1.u24) + S2.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CUBEID_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))):
|
|
if S2.f32 < 0.0:
|
|
D0.f32 = 5.0
|
|
else:
|
|
D0.f32 = 4.0
|
|
elif abs(S1.f32) >= abs(S0.f32):
|
|
if S1.f32 < 0.0:
|
|
D0.f32 = 3.0
|
|
else:
|
|
D0.f32 = 2.0
|
|
else:
|
|
if S0.f32 < 0.0:
|
|
D0.f32 = 1.0
|
|
else:
|
|
D0.f32 = 0.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CUBESC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))):
|
|
if S2.f32 < 0.0:
|
|
D0.f32 = -S0.f32
|
|
else:
|
|
D0.f32 = S0.f32
|
|
elif abs(S1.f32) >= abs(S0.f32):
|
|
D0.f32 = S0.f32
|
|
else:
|
|
if S0.f32 < 0.0:
|
|
D0.f32 = S2.f32
|
|
else:
|
|
D0.f32 = -S2.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CUBETC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))):
|
|
D0.f32 = -S1.f32
|
|
elif abs(S1.f32) >= abs(S0.f32):
|
|
if S1.f32 < 0.0:
|
|
D0.f32 = -S2.f32
|
|
else:
|
|
D0.f32 = S2.f32
|
|
else:
|
|
D0.f32 = -S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CUBEMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))):
|
|
D0.f32 = S2.f32 * 2.0
|
|
elif abs(S1.f32) >= abs(S0.f32):
|
|
D0.f32 = S1.f32 * 2.0
|
|
else:
|
|
D0.f32 = S0.f32 * 2.0
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1))
|
|
D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_BFI_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = fma(S0.f32, S1.f32, S2.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FMA_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = fma(S0.f64, S1.f64, S2.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LERP_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1 << 24))
|
|
tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1 << 16)
|
|
tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1 << 8)
|
|
tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1)
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ALIGNBIT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((_pack32(S0.u32, S1.u32) >> S2.u32[4 : 0]) & 0xffffffff)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ALIGNBYTE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((_pack32(S0.u32, S1.u32) >> (S2.u32[1 : 0] * 8)) & 0xffffffff)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MED3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (isNAN(F(S0.f32)) or isNAN(F(S1.f32)) or isNAN(F(S2.f32))):
|
|
D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32)
|
|
elif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32:
|
|
D0.f32 = v_max_f32(S1.f32, S2.f32)
|
|
elif v_max3_f32(S0.f32, S1.f32, S2.f32) == S1.f32:
|
|
D0.f32 = v_max_f32(S0.f32, S2.f32)
|
|
else:
|
|
D0.f32 = v_max_f32(S0.f32, S1.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MED3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32:
|
|
D0.i32 = v_max_i32(S1.i32, S2.i32)
|
|
elif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32:
|
|
D0.i32 = v_max_i32(S0.i32, S2.i32)
|
|
else:
|
|
D0.i32 = v_max_i32(S0.i32, S1.i32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MED3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32:
|
|
D0.u32 = v_max_u32(S1.u32, S2.u32)
|
|
elif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32:
|
|
D0.u32 = v_max_u32(S0.u32, S2.u32)
|
|
else:
|
|
D0.u32 = v_max_u32(S0.u32, S1.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S2.u32)
|
|
tmp += (ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0]))
|
|
tmp += (ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8]))
|
|
tmp += (ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16]))
|
|
tmp += (ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24]))
|
|
D0.u32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SAD_HI_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((v_sad_u8(S0, S1, 0)) << 16) + S2.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S2.u32)
|
|
tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16)
|
|
tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16)
|
|
D0.u32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_PK_U8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg((S2.u32 & (~(0xff << (S1.u32[1 : 0].u32 * 8)))))
|
|
tmp = Reg((tmp | (((f32_to_u8(S0.f32)) & 255) << (S1.u32[1 : 0].u32 * 8))))
|
|
D0.u32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_DIV_FIXUP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
sign_out = (sign(S1.f32) ^ sign(S2.f32))
|
|
if isNAN(F(S2.f32)):
|
|
D0.f32 = F(cvtToQuietNAN(F(S2.f32)))
|
|
elif isNAN(F(S1.f32)):
|
|
D0.f32 = F(cvtToQuietNAN(F(S1.f32)))
|
|
elif ((F(S1.f32) == 0.0) and (F(S2.f32) == 0.0)):
|
|
D0.f32 = F(0xffc00000)
|
|
elif ((F(abs(S1.f32)) == INF) and (F(abs(S2.f32)) == INF)):
|
|
D0.f32 = F(0xffc00000)
|
|
elif ((F(S1.f32) == 0.0) or (F(abs(S2.f32)) == INF)):
|
|
D0.f32 = (((-INF).f32) if (sign_out) else (INF.f32))
|
|
elif ((F(abs(S1.f32)) == INF) or (F(S2.f32) == 0.0)):
|
|
D0.f32 = ((-0.0) if (sign_out) else (0.0))
|
|
elif exponent(S2.f32) - exponent(S1.f32) < -150:
|
|
D0.f32 = ((-UNDERFLOW_F32) if (sign_out) else (UNDERFLOW_F32))
|
|
elif exponent(S1.f32) == 255:
|
|
D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32))
|
|
else:
|
|
D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) if isNAN(S0.f32) else ((-abs(S0.f32)) if (sign_out) else (abs(S0.f32)))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_DIV_FIXUP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
sign_out = (sign(S1.f64) ^ sign(S2.f64))
|
|
if isNAN(S2.f64):
|
|
D0.f64 = cvtToQuietNAN(S2.f64)
|
|
elif isNAN(S1.f64):
|
|
D0.f64 = cvtToQuietNAN(S1.f64)
|
|
elif ((S1.f64 == 0.0) and (S2.f64 == 0.0)):
|
|
D0.f64 = F(0xfff8000000000000)
|
|
elif ((abs(S1.f64) == INF) and (abs(S2.f64) == INF)):
|
|
D0.f64 = F(0xfff8000000000000)
|
|
elif ((S1.f64 == 0.0) or (abs(S2.f64) == INF)):
|
|
D0.f64 = (((-INF)) if (sign_out) else (INF))
|
|
elif ((abs(S1.f64) == INF) or (S2.f64 == 0.0)):
|
|
D0.f64 = ((-0.0) if (sign_out) else (0.0))
|
|
elif exponent(S2.f64) - exponent(S1.f64) < -1075:
|
|
D0.f64 = ((-UNDERFLOW_F64) if (sign_out) else (UNDERFLOW_F64))
|
|
elif exponent(S1.f64) == 2047:
|
|
D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64))
|
|
else:
|
|
D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64)))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_DIV_FMAS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if VCC.u64[laneId]:
|
|
D0.f32 = (2.0 ** 64 if exponent(S2.f32) > 127 else 2.0 ** -64) * fma(S0.f32, S1.f32, S2.f32)
|
|
else:
|
|
D0.f32 = fma(S0.f32, S1.f32, S2.f32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_DIV_FMAS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if VCC.u64[laneId]:
|
|
D0.f64 = (2.0 ** 128 if exponent(S2.f64) > 1023 else 2.0 ** -128) * fma(S0.f64, S1.f64, S2.f64)
|
|
else:
|
|
D0.f64 = fma(S0.f64, S1.f64, S2.f64)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MSAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S2.u32)
|
|
tmp += ((0) if (S1.u32[7 : 0] == 0) else ((ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0]))))
|
|
tmp += ((0) if (S1.u32[15 : 8] == 0) else ((ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8]))))
|
|
tmp += ((0) if (S1.u32[23 : 16] == 0) else ((ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16]))))
|
|
tmp += ((0) if (S1.u32[31 : 24] == 0) else ((ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24]))))
|
|
D0.u32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_QSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[63 : 48] = (v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32))
|
|
tmp[47 : 32] = (v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32))
|
|
tmp[31 : 16] = (v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32))
|
|
tmp[15 : 0] = (v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32))
|
|
D0.b64 = tmp.b64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MQSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[63 : 48] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32))
|
|
tmp[47 : 32] = (v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32))
|
|
tmp[31 : 16] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32))
|
|
tmp[15 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32))
|
|
D0.b64 = tmp.b64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MQSAD_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[127 : 96] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32))
|
|
tmp[95 : 64] = (v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32))
|
|
tmp[63 : 32] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32))
|
|
tmp[31 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32))
|
|
D0.b128 = tmp.b128
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAD_LEGACY_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.f16 * S1.f16 + S2.f16)
|
|
if OPSEL.u4[3]:
|
|
D0 = Reg(_pack(tmp.f16, D0[15 : 0]))
|
|
else:
|
|
D0 = Reg(_pack(0, tmp.f16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAD_LEGACY_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.u16 * S1.u16 + S2.u16)
|
|
if OPSEL.u4[3]:
|
|
D0 = Reg(_pack(tmp.u16, D0[15 : 0]))
|
|
else:
|
|
D0 = Reg(_pack(0, tmp.u16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAD_LEGACY_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.i16 * S1.i16 + S2.i16)
|
|
if OPSEL.u4[3]:
|
|
D0 = Reg(_pack(tmp.i16, D0[15 : 0]))
|
|
else:
|
|
D0 = Reg(_pack(0, tmp.i16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_PERM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24])
|
|
D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16])
|
|
D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8])
|
|
D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0])
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FMA_LEGACY_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(fma(S0.f16, S1.f16, S2.f16))
|
|
if OPSEL.u4[3]:
|
|
D0 = Reg(_pack(tmp.f16, D0[15 : 0]))
|
|
else:
|
|
D0 = Reg(_pack(0, tmp.f16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
sign_out = (sign(S1.f16) ^ sign(S2.f16))
|
|
if isNAN(F(S2.f16)):
|
|
tmp = Reg(cvtToQuietNAN(F(S2.f16)))
|
|
elif isNAN(F(S1.f16)):
|
|
tmp = Reg(cvtToQuietNAN(F(S1.f16)))
|
|
elif ((F(S1.f16) == 0.0) and (F(S2.f16) == 0.0)):
|
|
tmp = Reg(F(0xfe00))
|
|
elif ((F(abs(S1.f16)) == INF) and (F(abs(S2.f16)) == INF)):
|
|
tmp = Reg(F(0xfe00))
|
|
elif ((F(S1.f16) == 0.0) or (F(abs(S2.f16)) == INF)):
|
|
tmp = Reg((((-INF)) if (sign_out) else (INF)))
|
|
elif ((F(abs(S1.f16)) == INF) or (F(S2.f16) == 0.0)):
|
|
tmp = Reg(((-0.0) if (sign_out) else (0.0)))
|
|
else:
|
|
tmp = Reg(((-abs(S0.f16)) if (sign_out) else (abs(S0.f16))))
|
|
if OPSEL.u4[3]:
|
|
D0 = Reg(_pack(tmp.f16, D0[15 : 0]))
|
|
else:
|
|
D0 = Reg(_pack(0, tmp.f16))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_PKACCUM_U8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
byte = S1.u32[1 : 0]
|
|
bit = byte.u32 * 8
|
|
D0.u32[bit + 7 : bit] = (f32_to_u8(S0.f32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAD_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u16) * (S1.u16) + S2.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAD_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (S0.i16) * (S1.i16) + S2.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_XAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 ^ S1.u32) + S2.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MED3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (isNAN(F(S0.f16)) or isNAN(F(S1.f16)) or isNAN(F(S2.f16))):
|
|
D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16)
|
|
elif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16:
|
|
D0.f16 = v_max_f16(S1.f16, S2.f16)
|
|
elif v_max3_f16(S0.f16, S1.f16, S2.f16) == S1.f16:
|
|
D0.f16 = v_max_f16(S0.f16, S2.f16)
|
|
else:
|
|
D0.f16 = v_max_f16(S0.f16, S1.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MED3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16:
|
|
D0.i16 = v_max_i16(S1.i16, S2.i16)
|
|
elif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16:
|
|
D0.i16 = v_max_i16(S0.i16, S2.i16)
|
|
else:
|
|
D0.i16 = v_max_i16(S0.i16, S1.i16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MED3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16:
|
|
D0.u16 = v_max_u16(S1.u16, S2.u16)
|
|
elif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16:
|
|
D0.u16 = v_max_u16(S0.u16, S2.u16)
|
|
else:
|
|
D0.u16 = v_max_u16(S0.u16, S1.u16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LSHL_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ADD_LSHL_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ADD3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = S0.u32 + S1.u32 + S2.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LSHL_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_AND_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = ((S0.u32 & S1.u32) | S2.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_OR3_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (S0.u32 | S1.u32 | S2.u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = S0.f16 * S1.f16 + S2.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u16 = S0.u16 * S1.u16 + S2.u16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = S0.i16 * S1.i16 + S2.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f16 = fma(S0.f16, S1.f16, S2.f16)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_DIV_FIXUP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
sign_out = (sign(S1.f16) ^ sign(S2.f16))
|
|
if isNAN(F(S2.f16)):
|
|
D0.f16 = F(cvtToQuietNAN(F(S2.f16)))
|
|
elif isNAN(F(S1.f16)):
|
|
D0.f16 = F(cvtToQuietNAN(F(S1.f16)))
|
|
elif ((F(S1.f16) == 0.0) and (F(S2.f16) == 0.0)):
|
|
D0.f16 = F(0xfe00)
|
|
elif ((F(abs(S1.f16)) == INF) and (F(abs(S2.f16)) == INF)):
|
|
D0.f16 = F(0xfe00)
|
|
elif ((F(S1.f16) == 0.0) or (F(abs(S2.f16)) == INF)):
|
|
D0.f16 = (((-INF).f16) if (sign_out) else (INF.f16))
|
|
elif ((F(abs(S1.f16)) == INF) or (F(S2.f16) == 0.0)):
|
|
D0.f16 = ((-0.0) if (sign_out) else (0.0))
|
|
else:
|
|
D0.f16 = ((-abs(S0.f16)) if (sign_out) else (abs(S0.f16)))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LSHL_ADD_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = (S0.u64 << S1.u32[2 : 0].u32) + S2.u64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ADD_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = S0.f64 + S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = S0.f64 * S1.f64
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MIN_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (WAVE_MODE.IEEE and isSignalNAN(S0.f64)):
|
|
D0.f64 = cvtToQuietNAN(S0.f64)
|
|
elif (WAVE_MODE.IEEE and isSignalNAN(S1.f64)):
|
|
D0.f64 = cvtToQuietNAN(S1.f64)
|
|
elif isNAN(S0.f64):
|
|
D0.f64 = S1.f64
|
|
elif isNAN(S1.f64):
|
|
D0.f64 = S0.f64
|
|
elif ((S0.f64 == +0.0) and (S1.f64 == -0.0)):
|
|
D0.f64 = S1.f64
|
|
elif ((S0.f64 == -0.0) and (S1.f64 == +0.0)):
|
|
D0.f64 = S0.f64
|
|
else:
|
|
D0.f64 = ((S0.f64) if (S0.f64 < S1.f64) else (S1.f64))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAX_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if (WAVE_MODE.IEEE and isSignalNAN(S0.f64)):
|
|
D0.f64 = cvtToQuietNAN(S0.f64)
|
|
elif (WAVE_MODE.IEEE and isSignalNAN(S1.f64)):
|
|
D0.f64 = cvtToQuietNAN(S1.f64)
|
|
elif isNAN(S0.f64):
|
|
D0.f64 = S1.f64
|
|
elif isNAN(S1.f64):
|
|
D0.f64 = S0.f64
|
|
elif ((S0.f64 == +0.0) and (S1.f64 == -0.0)):
|
|
D0.f64 = S0.f64
|
|
elif ((S0.f64 == -0.0) and (S1.f64 == +0.0)):
|
|
D0.f64 = S1.f64
|
|
elif WAVE_MODE.IEEE:
|
|
D0.f64 = ((S0.f64) if (S0.f64 >= S1.f64) else (S1.f64))
|
|
else:
|
|
D0.f64 = ((S0.f64) if (S0.f64 > S1.f64) else (S1.f64))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LDEXP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f64 = S0.f64 * 2.0 ** S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_LO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = S0.u32 * S1.u32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (((S0.u32) * (S1.u32)) >> 32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = (((S0.i32) * (S1.i32)) >> 32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LDEXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = S0.f32 * 2.0 ** S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_READLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
lane = S1.u32[5 : 0]
|
|
D0.b32 = VGPR[lane][SRC0.u32]
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_BCNT_U32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S1.u32)
|
|
for i in range(0, int(31)+1):
|
|
tmp += S0[i].u32
|
|
D0.u32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LSHLREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = (S1.u64 << S0[5 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_LSHRREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u64 = (S1.u64 >> S0[5 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ASHRREV_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i64 = (S1.i64 >> S0[5 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_TRIG_PREOP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
shift = (S1[4 : 0].u32) * 53
|
|
if exponent(S0.f64) > 1077:
|
|
shift += exponent(S0.f64) - 1077
|
|
result = float(((TWO_OVER_PI_1201[1200 : 0] << int(shift)) >> (1201 - 53)) & 0x1fffffffffffff)
|
|
scale = -53 - shift
|
|
if exponent(S0.f64) >= 1968:
|
|
scale += 128
|
|
D0.f64 = ldexp(result, scale)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32)
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_PKNORM_I16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].i16 = f32_to_snorm(S0.f32)
|
|
tmp[31 : 16].i16 = f32_to_snorm(S1.f32)
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_PKNORM_U16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].u16 = f32_to_unorm(S0.f32)
|
|
tmp[31 : 16].u16 = f32_to_unorm(S1.f32)
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_PKRTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
prev_mode = ROUND_MODE
|
|
tmp[15 : 0].f16 = f32_to_f16(S0.f32)
|
|
tmp[31 : 16].f16 = f32_to_f16(S1.f32)
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_PK_U16_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].u16 = u32_to_u16(S0.u32)
|
|
tmp[31 : 16].u16 = u32_to_u16(S1.u32)
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_PK_I16_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].i16 = i32_to_i16(S0.i32)
|
|
tmp[31 : 16].i16 = i32_to_i16(S1.i32)
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_PKNORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].i16 = f16_to_snorm(S0.f16)
|
|
tmp[31 : 16].i16 = f16_to_snorm(S1.f16)
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_PKNORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[15 : 0].u16 = f16_to_unorm(S0.f16)
|
|
tmp[31 : 16].u16 = f16_to_unorm(S1.f16)
|
|
return {}
|
|
|
|
def _VOP3AOp_V_ADD_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = S0.i32 + S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SUB_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i32 = S0.i32 - S1.i32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ADD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = S0.i16 + S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_SUB_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.i16 = S0.i16 - S1.i16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_PACK_B32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0[31 : 16].f16 = S1.f16
|
|
D0[15 : 0].f16 = S0.f16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MUL_LEGACY_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)):
|
|
D0.f32 = 0.0
|
|
else:
|
|
D0.f32 = S0.f32 * S1.f32
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_DOT2C_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(D0.f32)
|
|
tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16)
|
|
tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16)
|
|
D0.f32 = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcword = OPSEL[0].i32 * 16
|
|
src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16
|
|
D0[31 : 0].f32 = tmp0
|
|
D0[63 : 32].f32 = tmp1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcword = OPSEL[0].i32 * 16
|
|
src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16
|
|
D0[31 : 0].f32 = tmp0
|
|
D0[63 : 32].f32 = tmp1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcbyte = OPSEL[1 : 0].i32 * 8
|
|
src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8
|
|
tmp = Reg(fp8_to_f32_scale(src, scale.u8))
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcbyte = OPSEL[1 : 0].i32 * 8
|
|
src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8
|
|
tmp = Reg(bf8_to_f32_scale(src, scale.u8))
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcbyte = OPSEL[1 : 0].i32 * 8
|
|
src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8
|
|
D0[31 : 0].f32 = tmp0
|
|
D0[63 : 32].f32 = tmp1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcword = OPSEL[0].i32 * 16
|
|
src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16
|
|
D0[15 : 0].f16 = tmp0
|
|
D0[31 : 16].f16 = tmp1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcword = OPSEL[0].i32 * 16
|
|
src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16
|
|
D0[15 : 0].f16 = tmp0
|
|
D0[31 : 16].f16 = tmp1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcbyte = OPSEL[1 : 0].i32 * 8
|
|
src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8
|
|
tmp = Reg(fp8_to_f16_scale(src, scale.u8))
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcbyte = OPSEL[1 : 0].i32 * 8
|
|
src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8
|
|
tmp = Reg(bf8_to_f16_scale(src, scale.u8))
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcbyte = OPSEL[1 : 0].i32 * 8
|
|
src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8
|
|
D0[15 : 0].f16 = tmp0
|
|
D0[31 : 16].f16 = tmp1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcbyte = OPSEL[1 : 0].i32 * 8
|
|
src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8
|
|
D0[15 : 0].bf16 = tmp0
|
|
D0[31 : 16].bf16 = tmp1
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ASHR_PK_I8_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32)
|
|
tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32)
|
|
D0[15 : 0] = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_ASHR_PK_U8_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32)
|
|
tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32)
|
|
D0[15 : 0] = tmp
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_PK_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
prev_mode = ROUND_MODE
|
|
tmp[15 : 0].f16 = f32_to_f16(S0.f32)
|
|
tmp[31 : 16].f16 = f32_to_f16(S1.f32)
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_PK_BF16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
prev_mode = ROUND_MODE
|
|
tmp[15 : 0].bf16 = f32_to_bf16(S0.f32)
|
|
tmp[31 : 16].bf16 = f32_to_bf16(S1.f32)
|
|
return {}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcword = OPSEL[0].i32 * 16
|
|
src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16
|
|
D0[15 : 0].bf16 = tmp0.bf16
|
|
D0[31 : 16].bf16 = tmp1.bf16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(0)
|
|
SRC0 = Reg(src0_idx)
|
|
# --- compiled pseudocode ---
|
|
scale = (exponent(S1.f32))
|
|
srcword = OPSEL[0].i32 * 16
|
|
src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16
|
|
D0[15 : 0].bf16 = tmp0.bf16
|
|
D0[31 : 16].bf16 = tmp1.bf16
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MINIMUM3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = F(v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32))
|
|
return {'D0': D0}
|
|
|
|
def _VOP3AOp_V_MAXIMUM3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0.f32 = F(v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32))
|
|
return {'D0': D0}
|
|
|
|
VOP3AOp_FUNCTIONS = {
|
|
VOP3AOp.V_CMP_CLASS_F32: _VOP3AOp_V_CMP_CLASS_F32,
|
|
VOP3AOp.V_CMPX_CLASS_F32: _VOP3AOp_V_CMPX_CLASS_F32,
|
|
VOP3AOp.V_CMP_CLASS_F64: _VOP3AOp_V_CMP_CLASS_F64,
|
|
VOP3AOp.V_CMPX_CLASS_F64: _VOP3AOp_V_CMPX_CLASS_F64,
|
|
VOP3AOp.V_CMP_CLASS_F16: _VOP3AOp_V_CMP_CLASS_F16,
|
|
VOP3AOp.V_CMPX_CLASS_F16: _VOP3AOp_V_CMPX_CLASS_F16,
|
|
VOP3AOp.V_CMP_F_F16: _VOP3AOp_V_CMP_F_F16,
|
|
VOP3AOp.V_CMP_LT_F16: _VOP3AOp_V_CMP_LT_F16,
|
|
VOP3AOp.V_CMP_EQ_F16: _VOP3AOp_V_CMP_EQ_F16,
|
|
VOP3AOp.V_CMP_LE_F16: _VOP3AOp_V_CMP_LE_F16,
|
|
VOP3AOp.V_CMP_GT_F16: _VOP3AOp_V_CMP_GT_F16,
|
|
VOP3AOp.V_CMP_LG_F16: _VOP3AOp_V_CMP_LG_F16,
|
|
VOP3AOp.V_CMP_GE_F16: _VOP3AOp_V_CMP_GE_F16,
|
|
VOP3AOp.V_CMP_O_F16: _VOP3AOp_V_CMP_O_F16,
|
|
VOP3AOp.V_CMP_U_F16: _VOP3AOp_V_CMP_U_F16,
|
|
VOP3AOp.V_CMP_NGE_F16: _VOP3AOp_V_CMP_NGE_F16,
|
|
VOP3AOp.V_CMP_NLG_F16: _VOP3AOp_V_CMP_NLG_F16,
|
|
VOP3AOp.V_CMP_NGT_F16: _VOP3AOp_V_CMP_NGT_F16,
|
|
VOP3AOp.V_CMP_NLE_F16: _VOP3AOp_V_CMP_NLE_F16,
|
|
VOP3AOp.V_CMP_NEQ_F16: _VOP3AOp_V_CMP_NEQ_F16,
|
|
VOP3AOp.V_CMP_NLT_F16: _VOP3AOp_V_CMP_NLT_F16,
|
|
VOP3AOp.V_CMP_TRU_F16: _VOP3AOp_V_CMP_TRU_F16,
|
|
VOP3AOp.V_CMPX_F_F16: _VOP3AOp_V_CMPX_F_F16,
|
|
VOP3AOp.V_CMPX_LT_F16: _VOP3AOp_V_CMPX_LT_F16,
|
|
VOP3AOp.V_CMPX_EQ_F16: _VOP3AOp_V_CMPX_EQ_F16,
|
|
VOP3AOp.V_CMPX_LE_F16: _VOP3AOp_V_CMPX_LE_F16,
|
|
VOP3AOp.V_CMPX_GT_F16: _VOP3AOp_V_CMPX_GT_F16,
|
|
VOP3AOp.V_CMPX_LG_F16: _VOP3AOp_V_CMPX_LG_F16,
|
|
VOP3AOp.V_CMPX_GE_F16: _VOP3AOp_V_CMPX_GE_F16,
|
|
VOP3AOp.V_CMPX_O_F16: _VOP3AOp_V_CMPX_O_F16,
|
|
VOP3AOp.V_CMPX_U_F16: _VOP3AOp_V_CMPX_U_F16,
|
|
VOP3AOp.V_CMPX_NGE_F16: _VOP3AOp_V_CMPX_NGE_F16,
|
|
VOP3AOp.V_CMPX_NLG_F16: _VOP3AOp_V_CMPX_NLG_F16,
|
|
VOP3AOp.V_CMPX_NGT_F16: _VOP3AOp_V_CMPX_NGT_F16,
|
|
VOP3AOp.V_CMPX_NLE_F16: _VOP3AOp_V_CMPX_NLE_F16,
|
|
VOP3AOp.V_CMPX_NEQ_F16: _VOP3AOp_V_CMPX_NEQ_F16,
|
|
VOP3AOp.V_CMPX_NLT_F16: _VOP3AOp_V_CMPX_NLT_F16,
|
|
VOP3AOp.V_CMPX_TRU_F16: _VOP3AOp_V_CMPX_TRU_F16,
|
|
VOP3AOp.V_CMP_F_F32: _VOP3AOp_V_CMP_F_F32,
|
|
VOP3AOp.V_CMP_LT_F32: _VOP3AOp_V_CMP_LT_F32,
|
|
VOP3AOp.V_CMP_EQ_F32: _VOP3AOp_V_CMP_EQ_F32,
|
|
VOP3AOp.V_CMP_LE_F32: _VOP3AOp_V_CMP_LE_F32,
|
|
VOP3AOp.V_CMP_GT_F32: _VOP3AOp_V_CMP_GT_F32,
|
|
VOP3AOp.V_CMP_LG_F32: _VOP3AOp_V_CMP_LG_F32,
|
|
VOP3AOp.V_CMP_GE_F32: _VOP3AOp_V_CMP_GE_F32,
|
|
VOP3AOp.V_CMP_O_F32: _VOP3AOp_V_CMP_O_F32,
|
|
VOP3AOp.V_CMP_U_F32: _VOP3AOp_V_CMP_U_F32,
|
|
VOP3AOp.V_CMP_NGE_F32: _VOP3AOp_V_CMP_NGE_F32,
|
|
VOP3AOp.V_CMP_NLG_F32: _VOP3AOp_V_CMP_NLG_F32,
|
|
VOP3AOp.V_CMP_NGT_F32: _VOP3AOp_V_CMP_NGT_F32,
|
|
VOP3AOp.V_CMP_NLE_F32: _VOP3AOp_V_CMP_NLE_F32,
|
|
VOP3AOp.V_CMP_NEQ_F32: _VOP3AOp_V_CMP_NEQ_F32,
|
|
VOP3AOp.V_CMP_NLT_F32: _VOP3AOp_V_CMP_NLT_F32,
|
|
VOP3AOp.V_CMP_TRU_F32: _VOP3AOp_V_CMP_TRU_F32,
|
|
VOP3AOp.V_CMPX_F_F32: _VOP3AOp_V_CMPX_F_F32,
|
|
VOP3AOp.V_CMPX_LT_F32: _VOP3AOp_V_CMPX_LT_F32,
|
|
VOP3AOp.V_CMPX_EQ_F32: _VOP3AOp_V_CMPX_EQ_F32,
|
|
VOP3AOp.V_CMPX_LE_F32: _VOP3AOp_V_CMPX_LE_F32,
|
|
VOP3AOp.V_CMPX_GT_F32: _VOP3AOp_V_CMPX_GT_F32,
|
|
VOP3AOp.V_CMPX_LG_F32: _VOP3AOp_V_CMPX_LG_F32,
|
|
VOP3AOp.V_CMPX_GE_F32: _VOP3AOp_V_CMPX_GE_F32,
|
|
VOP3AOp.V_CMPX_O_F32: _VOP3AOp_V_CMPX_O_F32,
|
|
VOP3AOp.V_CMPX_U_F32: _VOP3AOp_V_CMPX_U_F32,
|
|
VOP3AOp.V_CMPX_NGE_F32: _VOP3AOp_V_CMPX_NGE_F32,
|
|
VOP3AOp.V_CMPX_NLG_F32: _VOP3AOp_V_CMPX_NLG_F32,
|
|
VOP3AOp.V_CMPX_NGT_F32: _VOP3AOp_V_CMPX_NGT_F32,
|
|
VOP3AOp.V_CMPX_NLE_F32: _VOP3AOp_V_CMPX_NLE_F32,
|
|
VOP3AOp.V_CMPX_NEQ_F32: _VOP3AOp_V_CMPX_NEQ_F32,
|
|
VOP3AOp.V_CMPX_NLT_F32: _VOP3AOp_V_CMPX_NLT_F32,
|
|
VOP3AOp.V_CMPX_TRU_F32: _VOP3AOp_V_CMPX_TRU_F32,
|
|
VOP3AOp.V_CMP_F_F64: _VOP3AOp_V_CMP_F_F64,
|
|
VOP3AOp.V_CMP_LT_F64: _VOP3AOp_V_CMP_LT_F64,
|
|
VOP3AOp.V_CMP_EQ_F64: _VOP3AOp_V_CMP_EQ_F64,
|
|
VOP3AOp.V_CMP_LE_F64: _VOP3AOp_V_CMP_LE_F64,
|
|
VOP3AOp.V_CMP_GT_F64: _VOP3AOp_V_CMP_GT_F64,
|
|
VOP3AOp.V_CMP_LG_F64: _VOP3AOp_V_CMP_LG_F64,
|
|
VOP3AOp.V_CMP_GE_F64: _VOP3AOp_V_CMP_GE_F64,
|
|
VOP3AOp.V_CMP_O_F64: _VOP3AOp_V_CMP_O_F64,
|
|
VOP3AOp.V_CMP_U_F64: _VOP3AOp_V_CMP_U_F64,
|
|
VOP3AOp.V_CMP_NGE_F64: _VOP3AOp_V_CMP_NGE_F64,
|
|
VOP3AOp.V_CMP_NLG_F64: _VOP3AOp_V_CMP_NLG_F64,
|
|
VOP3AOp.V_CMP_NGT_F64: _VOP3AOp_V_CMP_NGT_F64,
|
|
VOP3AOp.V_CMP_NLE_F64: _VOP3AOp_V_CMP_NLE_F64,
|
|
VOP3AOp.V_CMP_NEQ_F64: _VOP3AOp_V_CMP_NEQ_F64,
|
|
VOP3AOp.V_CMP_NLT_F64: _VOP3AOp_V_CMP_NLT_F64,
|
|
VOP3AOp.V_CMP_TRU_F64: _VOP3AOp_V_CMP_TRU_F64,
|
|
VOP3AOp.V_CMPX_F_F64: _VOP3AOp_V_CMPX_F_F64,
|
|
VOP3AOp.V_CMPX_LT_F64: _VOP3AOp_V_CMPX_LT_F64,
|
|
VOP3AOp.V_CMPX_EQ_F64: _VOP3AOp_V_CMPX_EQ_F64,
|
|
VOP3AOp.V_CMPX_LE_F64: _VOP3AOp_V_CMPX_LE_F64,
|
|
VOP3AOp.V_CMPX_GT_F64: _VOP3AOp_V_CMPX_GT_F64,
|
|
VOP3AOp.V_CMPX_LG_F64: _VOP3AOp_V_CMPX_LG_F64,
|
|
VOP3AOp.V_CMPX_GE_F64: _VOP3AOp_V_CMPX_GE_F64,
|
|
VOP3AOp.V_CMPX_O_F64: _VOP3AOp_V_CMPX_O_F64,
|
|
VOP3AOp.V_CMPX_U_F64: _VOP3AOp_V_CMPX_U_F64,
|
|
VOP3AOp.V_CMPX_NGE_F64: _VOP3AOp_V_CMPX_NGE_F64,
|
|
VOP3AOp.V_CMPX_NLG_F64: _VOP3AOp_V_CMPX_NLG_F64,
|
|
VOP3AOp.V_CMPX_NGT_F64: _VOP3AOp_V_CMPX_NGT_F64,
|
|
VOP3AOp.V_CMPX_NLE_F64: _VOP3AOp_V_CMPX_NLE_F64,
|
|
VOP3AOp.V_CMPX_NEQ_F64: _VOP3AOp_V_CMPX_NEQ_F64,
|
|
VOP3AOp.V_CMPX_NLT_F64: _VOP3AOp_V_CMPX_NLT_F64,
|
|
VOP3AOp.V_CMPX_TRU_F64: _VOP3AOp_V_CMPX_TRU_F64,
|
|
VOP3AOp.V_CMP_F_I16: _VOP3AOp_V_CMP_F_I16,
|
|
VOP3AOp.V_CMP_LT_I16: _VOP3AOp_V_CMP_LT_I16,
|
|
VOP3AOp.V_CMP_EQ_I16: _VOP3AOp_V_CMP_EQ_I16,
|
|
VOP3AOp.V_CMP_LE_I16: _VOP3AOp_V_CMP_LE_I16,
|
|
VOP3AOp.V_CMP_GT_I16: _VOP3AOp_V_CMP_GT_I16,
|
|
VOP3AOp.V_CMP_NE_I16: _VOP3AOp_V_CMP_NE_I16,
|
|
VOP3AOp.V_CMP_GE_I16: _VOP3AOp_V_CMP_GE_I16,
|
|
VOP3AOp.V_CMP_T_I16: _VOP3AOp_V_CMP_T_I16,
|
|
VOP3AOp.V_CMP_F_U16: _VOP3AOp_V_CMP_F_U16,
|
|
VOP3AOp.V_CMP_LT_U16: _VOP3AOp_V_CMP_LT_U16,
|
|
VOP3AOp.V_CMP_EQ_U16: _VOP3AOp_V_CMP_EQ_U16,
|
|
VOP3AOp.V_CMP_LE_U16: _VOP3AOp_V_CMP_LE_U16,
|
|
VOP3AOp.V_CMP_GT_U16: _VOP3AOp_V_CMP_GT_U16,
|
|
VOP3AOp.V_CMP_NE_U16: _VOP3AOp_V_CMP_NE_U16,
|
|
VOP3AOp.V_CMP_GE_U16: _VOP3AOp_V_CMP_GE_U16,
|
|
VOP3AOp.V_CMP_T_U16: _VOP3AOp_V_CMP_T_U16,
|
|
VOP3AOp.V_CMPX_F_I16: _VOP3AOp_V_CMPX_F_I16,
|
|
VOP3AOp.V_CMPX_LT_I16: _VOP3AOp_V_CMPX_LT_I16,
|
|
VOP3AOp.V_CMPX_EQ_I16: _VOP3AOp_V_CMPX_EQ_I16,
|
|
VOP3AOp.V_CMPX_LE_I16: _VOP3AOp_V_CMPX_LE_I16,
|
|
VOP3AOp.V_CMPX_GT_I16: _VOP3AOp_V_CMPX_GT_I16,
|
|
VOP3AOp.V_CMPX_NE_I16: _VOP3AOp_V_CMPX_NE_I16,
|
|
VOP3AOp.V_CMPX_GE_I16: _VOP3AOp_V_CMPX_GE_I16,
|
|
VOP3AOp.V_CMPX_T_I16: _VOP3AOp_V_CMPX_T_I16,
|
|
VOP3AOp.V_CMPX_F_U16: _VOP3AOp_V_CMPX_F_U16,
|
|
VOP3AOp.V_CMPX_LT_U16: _VOP3AOp_V_CMPX_LT_U16,
|
|
VOP3AOp.V_CMPX_EQ_U16: _VOP3AOp_V_CMPX_EQ_U16,
|
|
VOP3AOp.V_CMPX_LE_U16: _VOP3AOp_V_CMPX_LE_U16,
|
|
VOP3AOp.V_CMPX_GT_U16: _VOP3AOp_V_CMPX_GT_U16,
|
|
VOP3AOp.V_CMPX_NE_U16: _VOP3AOp_V_CMPX_NE_U16,
|
|
VOP3AOp.V_CMPX_GE_U16: _VOP3AOp_V_CMPX_GE_U16,
|
|
VOP3AOp.V_CMPX_T_U16: _VOP3AOp_V_CMPX_T_U16,
|
|
VOP3AOp.V_CMP_F_I32: _VOP3AOp_V_CMP_F_I32,
|
|
VOP3AOp.V_CMP_LT_I32: _VOP3AOp_V_CMP_LT_I32,
|
|
VOP3AOp.V_CMP_EQ_I32: _VOP3AOp_V_CMP_EQ_I32,
|
|
VOP3AOp.V_CMP_LE_I32: _VOP3AOp_V_CMP_LE_I32,
|
|
VOP3AOp.V_CMP_GT_I32: _VOP3AOp_V_CMP_GT_I32,
|
|
VOP3AOp.V_CMP_NE_I32: _VOP3AOp_V_CMP_NE_I32,
|
|
VOP3AOp.V_CMP_GE_I32: _VOP3AOp_V_CMP_GE_I32,
|
|
VOP3AOp.V_CMP_T_I32: _VOP3AOp_V_CMP_T_I32,
|
|
VOP3AOp.V_CMP_F_U32: _VOP3AOp_V_CMP_F_U32,
|
|
VOP3AOp.V_CMP_LT_U32: _VOP3AOp_V_CMP_LT_U32,
|
|
VOP3AOp.V_CMP_EQ_U32: _VOP3AOp_V_CMP_EQ_U32,
|
|
VOP3AOp.V_CMP_LE_U32: _VOP3AOp_V_CMP_LE_U32,
|
|
VOP3AOp.V_CMP_GT_U32: _VOP3AOp_V_CMP_GT_U32,
|
|
VOP3AOp.V_CMP_NE_U32: _VOP3AOp_V_CMP_NE_U32,
|
|
VOP3AOp.V_CMP_GE_U32: _VOP3AOp_V_CMP_GE_U32,
|
|
VOP3AOp.V_CMP_T_U32: _VOP3AOp_V_CMP_T_U32,
|
|
VOP3AOp.V_CMPX_F_I32: _VOP3AOp_V_CMPX_F_I32,
|
|
VOP3AOp.V_CMPX_LT_I32: _VOP3AOp_V_CMPX_LT_I32,
|
|
VOP3AOp.V_CMPX_EQ_I32: _VOP3AOp_V_CMPX_EQ_I32,
|
|
VOP3AOp.V_CMPX_LE_I32: _VOP3AOp_V_CMPX_LE_I32,
|
|
VOP3AOp.V_CMPX_GT_I32: _VOP3AOp_V_CMPX_GT_I32,
|
|
VOP3AOp.V_CMPX_NE_I32: _VOP3AOp_V_CMPX_NE_I32,
|
|
VOP3AOp.V_CMPX_GE_I32: _VOP3AOp_V_CMPX_GE_I32,
|
|
VOP3AOp.V_CMPX_T_I32: _VOP3AOp_V_CMPX_T_I32,
|
|
VOP3AOp.V_CMPX_F_U32: _VOP3AOp_V_CMPX_F_U32,
|
|
VOP3AOp.V_CMPX_LT_U32: _VOP3AOp_V_CMPX_LT_U32,
|
|
VOP3AOp.V_CMPX_EQ_U32: _VOP3AOp_V_CMPX_EQ_U32,
|
|
VOP3AOp.V_CMPX_LE_U32: _VOP3AOp_V_CMPX_LE_U32,
|
|
VOP3AOp.V_CMPX_GT_U32: _VOP3AOp_V_CMPX_GT_U32,
|
|
VOP3AOp.V_CMPX_NE_U32: _VOP3AOp_V_CMPX_NE_U32,
|
|
VOP3AOp.V_CMPX_GE_U32: _VOP3AOp_V_CMPX_GE_U32,
|
|
VOP3AOp.V_CMPX_T_U32: _VOP3AOp_V_CMPX_T_U32,
|
|
VOP3AOp.V_CMP_F_I64: _VOP3AOp_V_CMP_F_I64,
|
|
VOP3AOp.V_CMP_LT_I64: _VOP3AOp_V_CMP_LT_I64,
|
|
VOP3AOp.V_CMP_EQ_I64: _VOP3AOp_V_CMP_EQ_I64,
|
|
VOP3AOp.V_CMP_LE_I64: _VOP3AOp_V_CMP_LE_I64,
|
|
VOP3AOp.V_CMP_GT_I64: _VOP3AOp_V_CMP_GT_I64,
|
|
VOP3AOp.V_CMP_NE_I64: _VOP3AOp_V_CMP_NE_I64,
|
|
VOP3AOp.V_CMP_GE_I64: _VOP3AOp_V_CMP_GE_I64,
|
|
VOP3AOp.V_CMP_T_I64: _VOP3AOp_V_CMP_T_I64,
|
|
VOP3AOp.V_CMP_F_U64: _VOP3AOp_V_CMP_F_U64,
|
|
VOP3AOp.V_CMP_LT_U64: _VOP3AOp_V_CMP_LT_U64,
|
|
VOP3AOp.V_CMP_EQ_U64: _VOP3AOp_V_CMP_EQ_U64,
|
|
VOP3AOp.V_CMP_LE_U64: _VOP3AOp_V_CMP_LE_U64,
|
|
VOP3AOp.V_CMP_GT_U64: _VOP3AOp_V_CMP_GT_U64,
|
|
VOP3AOp.V_CMP_NE_U64: _VOP3AOp_V_CMP_NE_U64,
|
|
VOP3AOp.V_CMP_GE_U64: _VOP3AOp_V_CMP_GE_U64,
|
|
VOP3AOp.V_CMP_T_U64: _VOP3AOp_V_CMP_T_U64,
|
|
VOP3AOp.V_CMPX_F_I64: _VOP3AOp_V_CMPX_F_I64,
|
|
VOP3AOp.V_CMPX_LT_I64: _VOP3AOp_V_CMPX_LT_I64,
|
|
VOP3AOp.V_CMPX_EQ_I64: _VOP3AOp_V_CMPX_EQ_I64,
|
|
VOP3AOp.V_CMPX_LE_I64: _VOP3AOp_V_CMPX_LE_I64,
|
|
VOP3AOp.V_CMPX_GT_I64: _VOP3AOp_V_CMPX_GT_I64,
|
|
VOP3AOp.V_CMPX_NE_I64: _VOP3AOp_V_CMPX_NE_I64,
|
|
VOP3AOp.V_CMPX_GE_I64: _VOP3AOp_V_CMPX_GE_I64,
|
|
VOP3AOp.V_CMPX_T_I64: _VOP3AOp_V_CMPX_T_I64,
|
|
VOP3AOp.V_CMPX_F_U64: _VOP3AOp_V_CMPX_F_U64,
|
|
VOP3AOp.V_CMPX_LT_U64: _VOP3AOp_V_CMPX_LT_U64,
|
|
VOP3AOp.V_CMPX_EQ_U64: _VOP3AOp_V_CMPX_EQ_U64,
|
|
VOP3AOp.V_CMPX_LE_U64: _VOP3AOp_V_CMPX_LE_U64,
|
|
VOP3AOp.V_CMPX_GT_U64: _VOP3AOp_V_CMPX_GT_U64,
|
|
VOP3AOp.V_CMPX_NE_U64: _VOP3AOp_V_CMPX_NE_U64,
|
|
VOP3AOp.V_CMPX_GE_U64: _VOP3AOp_V_CMPX_GE_U64,
|
|
VOP3AOp.V_CMPX_T_U64: _VOP3AOp_V_CMPX_T_U64,
|
|
VOP3AOp.V_MOV_B32: _VOP3AOp_V_MOV_B32,
|
|
VOP3AOp.V_READFIRSTLANE_B32: _VOP3AOp_V_READFIRSTLANE_B32,
|
|
VOP3AOp.V_CVT_I32_F64: _VOP3AOp_V_CVT_I32_F64,
|
|
VOP3AOp.V_CVT_F64_I32: _VOP3AOp_V_CVT_F64_I32,
|
|
VOP3AOp.V_CVT_F32_I32: _VOP3AOp_V_CVT_F32_I32,
|
|
VOP3AOp.V_CVT_F32_U32: _VOP3AOp_V_CVT_F32_U32,
|
|
VOP3AOp.V_CVT_U32_F32: _VOP3AOp_V_CVT_U32_F32,
|
|
VOP3AOp.V_CVT_I32_F32: _VOP3AOp_V_CVT_I32_F32,
|
|
VOP3AOp.V_CVT_F16_F32: _VOP3AOp_V_CVT_F16_F32,
|
|
VOP3AOp.V_CVT_F32_F16: _VOP3AOp_V_CVT_F32_F16,
|
|
VOP3AOp.V_CVT_RPI_I32_F32: _VOP3AOp_V_CVT_RPI_I32_F32,
|
|
VOP3AOp.V_CVT_FLR_I32_F32: _VOP3AOp_V_CVT_FLR_I32_F32,
|
|
VOP3AOp.V_CVT_F32_F64: _VOP3AOp_V_CVT_F32_F64,
|
|
VOP3AOp.V_CVT_F64_F32: _VOP3AOp_V_CVT_F64_F32,
|
|
VOP3AOp.V_CVT_F32_UBYTE0: _VOP3AOp_V_CVT_F32_UBYTE0,
|
|
VOP3AOp.V_CVT_F32_UBYTE1: _VOP3AOp_V_CVT_F32_UBYTE1,
|
|
VOP3AOp.V_CVT_F32_UBYTE2: _VOP3AOp_V_CVT_F32_UBYTE2,
|
|
VOP3AOp.V_CVT_F32_UBYTE3: _VOP3AOp_V_CVT_F32_UBYTE3,
|
|
VOP3AOp.V_CVT_U32_F64: _VOP3AOp_V_CVT_U32_F64,
|
|
VOP3AOp.V_CVT_F64_U32: _VOP3AOp_V_CVT_F64_U32,
|
|
VOP3AOp.V_TRUNC_F64: _VOP3AOp_V_TRUNC_F64,
|
|
VOP3AOp.V_CEIL_F64: _VOP3AOp_V_CEIL_F64,
|
|
VOP3AOp.V_RNDNE_F64: _VOP3AOp_V_RNDNE_F64,
|
|
VOP3AOp.V_FLOOR_F64: _VOP3AOp_V_FLOOR_F64,
|
|
VOP3AOp.V_FRACT_F32: _VOP3AOp_V_FRACT_F32,
|
|
VOP3AOp.V_TRUNC_F32: _VOP3AOp_V_TRUNC_F32,
|
|
VOP3AOp.V_CEIL_F32: _VOP3AOp_V_CEIL_F32,
|
|
VOP3AOp.V_RNDNE_F32: _VOP3AOp_V_RNDNE_F32,
|
|
VOP3AOp.V_FLOOR_F32: _VOP3AOp_V_FLOOR_F32,
|
|
VOP3AOp.V_EXP_F32: _VOP3AOp_V_EXP_F32,
|
|
VOP3AOp.V_LOG_F32: _VOP3AOp_V_LOG_F32,
|
|
VOP3AOp.V_RCP_F32: _VOP3AOp_V_RCP_F32,
|
|
VOP3AOp.V_RCP_IFLAG_F32: _VOP3AOp_V_RCP_IFLAG_F32,
|
|
VOP3AOp.V_RSQ_F32: _VOP3AOp_V_RSQ_F32,
|
|
VOP3AOp.V_RCP_F64: _VOP3AOp_V_RCP_F64,
|
|
VOP3AOp.V_RSQ_F64: _VOP3AOp_V_RSQ_F64,
|
|
VOP3AOp.V_SQRT_F32: _VOP3AOp_V_SQRT_F32,
|
|
VOP3AOp.V_SQRT_F64: _VOP3AOp_V_SQRT_F64,
|
|
VOP3AOp.V_SIN_F32: _VOP3AOp_V_SIN_F32,
|
|
VOP3AOp.V_COS_F32: _VOP3AOp_V_COS_F32,
|
|
VOP3AOp.V_NOT_B32: _VOP3AOp_V_NOT_B32,
|
|
VOP3AOp.V_BFREV_B32: _VOP3AOp_V_BFREV_B32,
|
|
VOP3AOp.V_FFBH_U32: _VOP3AOp_V_FFBH_U32,
|
|
VOP3AOp.V_FFBL_B32: _VOP3AOp_V_FFBL_B32,
|
|
VOP3AOp.V_FFBH_I32: _VOP3AOp_V_FFBH_I32,
|
|
VOP3AOp.V_FREXP_EXP_I32_F64: _VOP3AOp_V_FREXP_EXP_I32_F64,
|
|
VOP3AOp.V_FREXP_MANT_F64: _VOP3AOp_V_FREXP_MANT_F64,
|
|
VOP3AOp.V_FRACT_F64: _VOP3AOp_V_FRACT_F64,
|
|
VOP3AOp.V_FREXP_EXP_I32_F32: _VOP3AOp_V_FREXP_EXP_I32_F32,
|
|
VOP3AOp.V_FREXP_MANT_F32: _VOP3AOp_V_FREXP_MANT_F32,
|
|
VOP3AOp.V_MOV_B64: _VOP3AOp_V_MOV_B64,
|
|
VOP3AOp.V_CVT_F16_U16: _VOP3AOp_V_CVT_F16_U16,
|
|
VOP3AOp.V_CVT_F16_I16: _VOP3AOp_V_CVT_F16_I16,
|
|
VOP3AOp.V_CVT_U16_F16: _VOP3AOp_V_CVT_U16_F16,
|
|
VOP3AOp.V_CVT_I16_F16: _VOP3AOp_V_CVT_I16_F16,
|
|
VOP3AOp.V_RCP_F16: _VOP3AOp_V_RCP_F16,
|
|
VOP3AOp.V_SQRT_F16: _VOP3AOp_V_SQRT_F16,
|
|
VOP3AOp.V_RSQ_F16: _VOP3AOp_V_RSQ_F16,
|
|
VOP3AOp.V_LOG_F16: _VOP3AOp_V_LOG_F16,
|
|
VOP3AOp.V_EXP_F16: _VOP3AOp_V_EXP_F16,
|
|
VOP3AOp.V_CNDMASK_B32: _VOP3AOp_V_CNDMASK_B32,
|
|
VOP3AOp.V_ADD_F32: _VOP3AOp_V_ADD_F32,
|
|
VOP3AOp.V_SUB_F32: _VOP3AOp_V_SUB_F32,
|
|
VOP3AOp.V_SUBREV_F32: _VOP3AOp_V_SUBREV_F32,
|
|
VOP3AOp.V_FMAC_F64: _VOP3AOp_V_FMAC_F64,
|
|
VOP3AOp.V_MUL_F32: _VOP3AOp_V_MUL_F32,
|
|
VOP3AOp.V_MUL_I32_I24: _VOP3AOp_V_MUL_I32_I24,
|
|
VOP3AOp.V_MUL_HI_I32_I24: _VOP3AOp_V_MUL_HI_I32_I24,
|
|
VOP3AOp.V_MUL_U32_U24: _VOP3AOp_V_MUL_U32_U24,
|
|
VOP3AOp.V_MUL_HI_U32_U24: _VOP3AOp_V_MUL_HI_U32_U24,
|
|
VOP3AOp.V_MIN_F32: _VOP3AOp_V_MIN_F32,
|
|
VOP3AOp.V_MAX_F32: _VOP3AOp_V_MAX_F32,
|
|
VOP3AOp.V_MIN_I32: _VOP3AOp_V_MIN_I32,
|
|
VOP3AOp.V_MAX_I32: _VOP3AOp_V_MAX_I32,
|
|
VOP3AOp.V_MIN_U32: _VOP3AOp_V_MIN_U32,
|
|
VOP3AOp.V_MAX_U32: _VOP3AOp_V_MAX_U32,
|
|
VOP3AOp.V_LSHRREV_B32: _VOP3AOp_V_LSHRREV_B32,
|
|
VOP3AOp.V_ASHRREV_I32: _VOP3AOp_V_ASHRREV_I32,
|
|
VOP3AOp.V_LSHLREV_B32: _VOP3AOp_V_LSHLREV_B32,
|
|
VOP3AOp.V_AND_B32: _VOP3AOp_V_AND_B32,
|
|
VOP3AOp.V_OR_B32: _VOP3AOp_V_OR_B32,
|
|
VOP3AOp.V_XOR_B32: _VOP3AOp_V_XOR_B32,
|
|
VOP3AOp.V_ADD_F16: _VOP3AOp_V_ADD_F16,
|
|
VOP3AOp.V_SUB_F16: _VOP3AOp_V_SUB_F16,
|
|
VOP3AOp.V_SUBREV_F16: _VOP3AOp_V_SUBREV_F16,
|
|
VOP3AOp.V_MUL_F16: _VOP3AOp_V_MUL_F16,
|
|
VOP3AOp.V_MAC_F16: _VOP3AOp_V_MAC_F16,
|
|
VOP3AOp.V_ADD_U16: _VOP3AOp_V_ADD_U16,
|
|
VOP3AOp.V_SUB_U16: _VOP3AOp_V_SUB_U16,
|
|
VOP3AOp.V_SUBREV_U16: _VOP3AOp_V_SUBREV_U16,
|
|
VOP3AOp.V_MUL_LO_U16: _VOP3AOp_V_MUL_LO_U16,
|
|
VOP3AOp.V_LSHLREV_B16: _VOP3AOp_V_LSHLREV_B16,
|
|
VOP3AOp.V_LSHRREV_B16: _VOP3AOp_V_LSHRREV_B16,
|
|
VOP3AOp.V_ASHRREV_I16: _VOP3AOp_V_ASHRREV_I16,
|
|
VOP3AOp.V_MAX_F16: _VOP3AOp_V_MAX_F16,
|
|
VOP3AOp.V_MIN_F16: _VOP3AOp_V_MIN_F16,
|
|
VOP3AOp.V_MAX_U16: _VOP3AOp_V_MAX_U16,
|
|
VOP3AOp.V_MAX_I16: _VOP3AOp_V_MAX_I16,
|
|
VOP3AOp.V_MIN_U16: _VOP3AOp_V_MIN_U16,
|
|
VOP3AOp.V_MIN_I16: _VOP3AOp_V_MIN_I16,
|
|
VOP3AOp.V_LDEXP_F16: _VOP3AOp_V_LDEXP_F16,
|
|
VOP3AOp.V_ADD_U32: _VOP3AOp_V_ADD_U32,
|
|
VOP3AOp.V_SUB_U32: _VOP3AOp_V_SUB_U32,
|
|
VOP3AOp.V_SUBREV_U32: _VOP3AOp_V_SUBREV_U32,
|
|
VOP3AOp.V_DOT2C_F32_F16: _VOP3AOp_V_DOT2C_F32_F16,
|
|
VOP3AOp.V_DOT2C_I32_I16: _VOP3AOp_V_DOT2C_I32_I16,
|
|
VOP3AOp.V_DOT4C_I32_I8: _VOP3AOp_V_DOT4C_I32_I8,
|
|
VOP3AOp.V_DOT8C_I32_I4: _VOP3AOp_V_DOT8C_I32_I4,
|
|
VOP3AOp.V_FMAC_F32: _VOP3AOp_V_FMAC_F32,
|
|
VOP3AOp.V_PK_FMAC_F16: _VOP3AOp_V_PK_FMAC_F16,
|
|
VOP3AOp.V_XNOR_B32: _VOP3AOp_V_XNOR_B32,
|
|
VOP3AOp.V_MAD_I32_I24: _VOP3AOp_V_MAD_I32_I24,
|
|
VOP3AOp.V_MAD_U32_U24: _VOP3AOp_V_MAD_U32_U24,
|
|
VOP3AOp.V_CUBEID_F32: _VOP3AOp_V_CUBEID_F32,
|
|
VOP3AOp.V_CUBESC_F32: _VOP3AOp_V_CUBESC_F32,
|
|
VOP3AOp.V_CUBETC_F32: _VOP3AOp_V_CUBETC_F32,
|
|
VOP3AOp.V_CUBEMA_F32: _VOP3AOp_V_CUBEMA_F32,
|
|
VOP3AOp.V_BFE_U32: _VOP3AOp_V_BFE_U32,
|
|
VOP3AOp.V_BFE_I32: _VOP3AOp_V_BFE_I32,
|
|
VOP3AOp.V_BFI_B32: _VOP3AOp_V_BFI_B32,
|
|
VOP3AOp.V_FMA_F32: _VOP3AOp_V_FMA_F32,
|
|
VOP3AOp.V_FMA_F64: _VOP3AOp_V_FMA_F64,
|
|
VOP3AOp.V_LERP_U8: _VOP3AOp_V_LERP_U8,
|
|
VOP3AOp.V_ALIGNBIT_B32: _VOP3AOp_V_ALIGNBIT_B32,
|
|
VOP3AOp.V_ALIGNBYTE_B32: _VOP3AOp_V_ALIGNBYTE_B32,
|
|
VOP3AOp.V_MIN3_F32: _VOP3AOp_V_MIN3_F32,
|
|
VOP3AOp.V_MIN3_I32: _VOP3AOp_V_MIN3_I32,
|
|
VOP3AOp.V_MIN3_U32: _VOP3AOp_V_MIN3_U32,
|
|
VOP3AOp.V_MAX3_F32: _VOP3AOp_V_MAX3_F32,
|
|
VOP3AOp.V_MAX3_I32: _VOP3AOp_V_MAX3_I32,
|
|
VOP3AOp.V_MAX3_U32: _VOP3AOp_V_MAX3_U32,
|
|
VOP3AOp.V_MED3_F32: _VOP3AOp_V_MED3_F32,
|
|
VOP3AOp.V_MED3_I32: _VOP3AOp_V_MED3_I32,
|
|
VOP3AOp.V_MED3_U32: _VOP3AOp_V_MED3_U32,
|
|
VOP3AOp.V_SAD_U8: _VOP3AOp_V_SAD_U8,
|
|
VOP3AOp.V_SAD_HI_U8: _VOP3AOp_V_SAD_HI_U8,
|
|
VOP3AOp.V_SAD_U16: _VOP3AOp_V_SAD_U16,
|
|
VOP3AOp.V_SAD_U32: _VOP3AOp_V_SAD_U32,
|
|
VOP3AOp.V_CVT_PK_U8_F32: _VOP3AOp_V_CVT_PK_U8_F32,
|
|
VOP3AOp.V_DIV_FIXUP_F32: _VOP3AOp_V_DIV_FIXUP_F32,
|
|
VOP3AOp.V_DIV_FIXUP_F64: _VOP3AOp_V_DIV_FIXUP_F64,
|
|
VOP3AOp.V_DIV_FMAS_F32: _VOP3AOp_V_DIV_FMAS_F32,
|
|
VOP3AOp.V_DIV_FMAS_F64: _VOP3AOp_V_DIV_FMAS_F64,
|
|
VOP3AOp.V_MSAD_U8: _VOP3AOp_V_MSAD_U8,
|
|
VOP3AOp.V_QSAD_PK_U16_U8: _VOP3AOp_V_QSAD_PK_U16_U8,
|
|
VOP3AOp.V_MQSAD_PK_U16_U8: _VOP3AOp_V_MQSAD_PK_U16_U8,
|
|
VOP3AOp.V_MQSAD_U32_U8: _VOP3AOp_V_MQSAD_U32_U8,
|
|
VOP3AOp.V_MAD_LEGACY_F16: _VOP3AOp_V_MAD_LEGACY_F16,
|
|
VOP3AOp.V_MAD_LEGACY_U16: _VOP3AOp_V_MAD_LEGACY_U16,
|
|
VOP3AOp.V_MAD_LEGACY_I16: _VOP3AOp_V_MAD_LEGACY_I16,
|
|
VOP3AOp.V_PERM_B32: _VOP3AOp_V_PERM_B32,
|
|
VOP3AOp.V_FMA_LEGACY_F16: _VOP3AOp_V_FMA_LEGACY_F16,
|
|
VOP3AOp.V_DIV_FIXUP_LEGACY_F16: _VOP3AOp_V_DIV_FIXUP_LEGACY_F16,
|
|
VOP3AOp.V_CVT_PKACCUM_U8_F32: _VOP3AOp_V_CVT_PKACCUM_U8_F32,
|
|
VOP3AOp.V_MAD_U32_U16: _VOP3AOp_V_MAD_U32_U16,
|
|
VOP3AOp.V_MAD_I32_I16: _VOP3AOp_V_MAD_I32_I16,
|
|
VOP3AOp.V_XAD_U32: _VOP3AOp_V_XAD_U32,
|
|
VOP3AOp.V_MIN3_F16: _VOP3AOp_V_MIN3_F16,
|
|
VOP3AOp.V_MIN3_I16: _VOP3AOp_V_MIN3_I16,
|
|
VOP3AOp.V_MIN3_U16: _VOP3AOp_V_MIN3_U16,
|
|
VOP3AOp.V_MAX3_F16: _VOP3AOp_V_MAX3_F16,
|
|
VOP3AOp.V_MAX3_I16: _VOP3AOp_V_MAX3_I16,
|
|
VOP3AOp.V_MAX3_U16: _VOP3AOp_V_MAX3_U16,
|
|
VOP3AOp.V_MED3_F16: _VOP3AOp_V_MED3_F16,
|
|
VOP3AOp.V_MED3_I16: _VOP3AOp_V_MED3_I16,
|
|
VOP3AOp.V_MED3_U16: _VOP3AOp_V_MED3_U16,
|
|
VOP3AOp.V_LSHL_ADD_U32: _VOP3AOp_V_LSHL_ADD_U32,
|
|
VOP3AOp.V_ADD_LSHL_U32: _VOP3AOp_V_ADD_LSHL_U32,
|
|
VOP3AOp.V_ADD3_U32: _VOP3AOp_V_ADD3_U32,
|
|
VOP3AOp.V_LSHL_OR_B32: _VOP3AOp_V_LSHL_OR_B32,
|
|
VOP3AOp.V_AND_OR_B32: _VOP3AOp_V_AND_OR_B32,
|
|
VOP3AOp.V_OR3_B32: _VOP3AOp_V_OR3_B32,
|
|
VOP3AOp.V_MAD_F16: _VOP3AOp_V_MAD_F16,
|
|
VOP3AOp.V_MAD_U16: _VOP3AOp_V_MAD_U16,
|
|
VOP3AOp.V_MAD_I16: _VOP3AOp_V_MAD_I16,
|
|
VOP3AOp.V_FMA_F16: _VOP3AOp_V_FMA_F16,
|
|
VOP3AOp.V_DIV_FIXUP_F16: _VOP3AOp_V_DIV_FIXUP_F16,
|
|
VOP3AOp.V_LSHL_ADD_U64: _VOP3AOp_V_LSHL_ADD_U64,
|
|
VOP3AOp.V_ADD_F64: _VOP3AOp_V_ADD_F64,
|
|
VOP3AOp.V_MUL_F64: _VOP3AOp_V_MUL_F64,
|
|
VOP3AOp.V_MIN_F64: _VOP3AOp_V_MIN_F64,
|
|
VOP3AOp.V_MAX_F64: _VOP3AOp_V_MAX_F64,
|
|
VOP3AOp.V_LDEXP_F64: _VOP3AOp_V_LDEXP_F64,
|
|
VOP3AOp.V_MUL_LO_U32: _VOP3AOp_V_MUL_LO_U32,
|
|
VOP3AOp.V_MUL_HI_U32: _VOP3AOp_V_MUL_HI_U32,
|
|
VOP3AOp.V_MUL_HI_I32: _VOP3AOp_V_MUL_HI_I32,
|
|
VOP3AOp.V_LDEXP_F32: _VOP3AOp_V_LDEXP_F32,
|
|
VOP3AOp.V_READLANE_B32: _VOP3AOp_V_READLANE_B32,
|
|
VOP3AOp.V_BCNT_U32_B32: _VOP3AOp_V_BCNT_U32_B32,
|
|
VOP3AOp.V_LSHLREV_B64: _VOP3AOp_V_LSHLREV_B64,
|
|
VOP3AOp.V_LSHRREV_B64: _VOP3AOp_V_LSHRREV_B64,
|
|
VOP3AOp.V_ASHRREV_I64: _VOP3AOp_V_ASHRREV_I64,
|
|
VOP3AOp.V_TRIG_PREOP_F64: _VOP3AOp_V_TRIG_PREOP_F64,
|
|
VOP3AOp.V_BFM_B32: _VOP3AOp_V_BFM_B32,
|
|
VOP3AOp.V_CVT_PKNORM_I16_F32: _VOP3AOp_V_CVT_PKNORM_I16_F32,
|
|
VOP3AOp.V_CVT_PKNORM_U16_F32: _VOP3AOp_V_CVT_PKNORM_U16_F32,
|
|
VOP3AOp.V_CVT_PKRTZ_F16_F32: _VOP3AOp_V_CVT_PKRTZ_F16_F32,
|
|
VOP3AOp.V_CVT_PK_U16_U32: _VOP3AOp_V_CVT_PK_U16_U32,
|
|
VOP3AOp.V_CVT_PK_I16_I32: _VOP3AOp_V_CVT_PK_I16_I32,
|
|
VOP3AOp.V_CVT_PKNORM_I16_F16: _VOP3AOp_V_CVT_PKNORM_I16_F16,
|
|
VOP3AOp.V_CVT_PKNORM_U16_F16: _VOP3AOp_V_CVT_PKNORM_U16_F16,
|
|
VOP3AOp.V_ADD_I32: _VOP3AOp_V_ADD_I32,
|
|
VOP3AOp.V_SUB_I32: _VOP3AOp_V_SUB_I32,
|
|
VOP3AOp.V_ADD_I16: _VOP3AOp_V_ADD_I16,
|
|
VOP3AOp.V_SUB_I16: _VOP3AOp_V_SUB_I16,
|
|
VOP3AOp.V_PACK_B32_F16: _VOP3AOp_V_PACK_B32_F16,
|
|
VOP3AOp.V_MUL_LEGACY_F32: _VOP3AOp_V_MUL_LEGACY_F32,
|
|
VOP3AOp.V_DOT2C_F32_BF16: _VOP3AOp_V_DOT2C_F32_BF16,
|
|
VOP3AOp.V_CVT_SCALEF32_PK_F32_FP8: _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8,
|
|
VOP3AOp.V_CVT_SCALEF32_PK_F32_BF8: _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8,
|
|
VOP3AOp.V_CVT_SCALEF32_F32_FP8: _VOP3AOp_V_CVT_SCALEF32_F32_FP8,
|
|
VOP3AOp.V_CVT_SCALEF32_F32_BF8: _VOP3AOp_V_CVT_SCALEF32_F32_BF8,
|
|
VOP3AOp.V_CVT_SCALEF32_PK_F32_FP4: _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4,
|
|
VOP3AOp.V_CVT_SCALEF32_PK_F16_FP8: _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8,
|
|
VOP3AOp.V_CVT_SCALEF32_PK_F16_BF8: _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8,
|
|
VOP3AOp.V_CVT_SCALEF32_F16_FP8: _VOP3AOp_V_CVT_SCALEF32_F16_FP8,
|
|
VOP3AOp.V_CVT_SCALEF32_F16_BF8: _VOP3AOp_V_CVT_SCALEF32_F16_BF8,
|
|
VOP3AOp.V_CVT_SCALEF32_PK_F16_FP4: _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4,
|
|
VOP3AOp.V_CVT_SCALEF32_PK_BF16_FP4: _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4,
|
|
VOP3AOp.V_ASHR_PK_I8_I32: _VOP3AOp_V_ASHR_PK_I8_I32,
|
|
VOP3AOp.V_ASHR_PK_U8_I32: _VOP3AOp_V_ASHR_PK_U8_I32,
|
|
VOP3AOp.V_CVT_PK_F16_F32: _VOP3AOp_V_CVT_PK_F16_F32,
|
|
VOP3AOp.V_CVT_PK_BF16_F32: _VOP3AOp_V_CVT_PK_BF16_F32,
|
|
VOP3AOp.V_CVT_SCALEF32_PK_BF16_FP8: _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8,
|
|
VOP3AOp.V_CVT_SCALEF32_PK_BF16_BF8: _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8,
|
|
VOP3AOp.V_MINIMUM3_F32: _VOP3AOp_V_MINIMUM3_F32,
|
|
VOP3AOp.V_MAXIMUM3_F32: _VOP3AOp_V_MAXIMUM3_F32,
|
|
}
|
|
|
|
def _VOP3BOp_V_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg((S0.u32) + (S1.u32))
|
|
VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP3BOp_V_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.u32 - S1.u32)
|
|
VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP3BOp_V_SUBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S1.u32 - S0.u32)
|
|
VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP3BOp_V_ADDC_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId])
|
|
VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP3BOp_V_SUBB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId])
|
|
VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP3BOp_V_SUBBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId])
|
|
VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0))
|
|
D0.u32 = tmp.u32
|
|
return {'D0': D0, 'VCC': VCC}
|
|
|
|
def _VOP3BOp_V_DIV_SCALE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0 = Reg(S0._val)
|
|
# --- compiled pseudocode ---
|
|
VCC = Reg(0x0)
|
|
if ((F(S2.f32) == 0.0) or (F(S1.f32) == 0.0)):
|
|
VCC = Reg(0x1); D0.f32 = float("nan")
|
|
elif exponent(S2.f32) - exponent(S1.f32) >= 96:
|
|
VCC = Reg(0x1)
|
|
if S0.f32 == S1.f32:
|
|
D0.f32 = ldexp(S0.f32, 64)
|
|
elif False:
|
|
pass
|
|
elif ((1.0 / F(S1.f32) == DENORM.f64) and (S2.f32 / S1.f32 == DENORM.f32)):
|
|
VCC = Reg(0x1)
|
|
if S0.f32 == S1.f32:
|
|
D0.f32 = ldexp(S0.f32, 64)
|
|
elif 1.0 / F(S1.f32) == DENORM.f64:
|
|
D0.f32 = ldexp(S0.f32, -64)
|
|
elif S2.f32 / S1.f32 == DENORM.f32:
|
|
VCC = Reg(0x1)
|
|
elif exponent(S2.f32) <= 23:
|
|
VCC = Reg(0x1); D0.f32 = ldexp(S0.f32, 64)
|
|
if S1.f32 == DENORM.f32:
|
|
D0.f32 = float("nan")
|
|
return {'D0': D0}
|
|
|
|
def _VOP3BOp_V_DIV_SCALE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D0 = Reg(S0._val)
|
|
# --- compiled pseudocode ---
|
|
VCC = Reg(0x0)
|
|
if ((S2.f64 == 0.0) or (S1.f64 == 0.0)):
|
|
VCC = Reg(0x1); D0.f64 = float("nan")
|
|
elif exponent(S2.f64) - exponent(S1.f64) >= 768:
|
|
VCC = Reg(0x1)
|
|
if S0.f64 == S1.f64:
|
|
D0.f64 = ldexp(S0.f64, 128)
|
|
elif False:
|
|
pass
|
|
elif ((1.0 / S1.f64 == DENORM.f64) and (S2.f64 / S1.f64 == DENORM.f64)):
|
|
VCC = Reg(0x1)
|
|
if S0.f64 == S1.f64:
|
|
D0.f64 = ldexp(S0.f64, 128)
|
|
elif 1.0 / S1.f64 == DENORM.f64:
|
|
D0.f64 = ldexp(S0.f64, -128)
|
|
elif S2.f64 / S1.f64 == DENORM.f64:
|
|
VCC = Reg(0x1)
|
|
elif exponent(S2.f64) <= 53:
|
|
D0.f64 = ldexp(S0.f64, 128)
|
|
if S1.f64 == DENORM.f64:
|
|
D0.f64 = float("nan")
|
|
return {'D0': D0}
|
|
|
|
def _VOP3BOp_V_MAD_U64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D1 = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
_full = ((S0.u32) * (S1.u32) + (S2.u64))
|
|
D0.u64 = int(_full) & 0xffffffffffffffff
|
|
D1 = Reg((int(_full) >> 64) & 1)
|
|
return {'D0': D0, 'D1': D1}
|
|
|
|
def _VOP3BOp_V_MAD_I64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):
|
|
D1 = Reg(0)
|
|
# --- compiled pseudocode ---
|
|
_full = ((S0.i32) * (S1.i32) + (S2.i64))
|
|
D0.u64 = int(_full) & 0xffffffffffffffff
|
|
D1 = Reg((int(_full) >> 64) & 1)
|
|
return {'D0': D0, 'D1': D1}
|
|
|
|
VOP3BOp_FUNCTIONS = {
|
|
VOP3BOp.V_ADD_CO_U32: _VOP3BOp_V_ADD_CO_U32,
|
|
VOP3BOp.V_SUB_CO_U32: _VOP3BOp_V_SUB_CO_U32,
|
|
VOP3BOp.V_SUBREV_CO_U32: _VOP3BOp_V_SUBREV_CO_U32,
|
|
VOP3BOp.V_ADDC_CO_U32: _VOP3BOp_V_ADDC_CO_U32,
|
|
VOP3BOp.V_SUBB_CO_U32: _VOP3BOp_V_SUBB_CO_U32,
|
|
VOP3BOp.V_SUBBREV_CO_U32: _VOP3BOp_V_SUBBREV_CO_U32,
|
|
VOP3BOp.V_DIV_SCALE_F32: _VOP3BOp_V_DIV_SCALE_F32,
|
|
VOP3BOp.V_DIV_SCALE_F64: _VOP3BOp_V_DIV_SCALE_F64,
|
|
VOP3BOp.V_MAD_U64_U32: _VOP3BOp_V_MAD_U64_U32,
|
|
VOP3BOp.V_MAD_I64_I32: _VOP3BOp_V_MAD_I64_I32,
|
|
}
|
|
|
|
def _DSOp_DS_ADD_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 += DATA.u32
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_SUB_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 -= DATA.u32
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_RSUB_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 = DATA.u32 - MEM[addr].u32
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_INC_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((0) if (tmp >= src) else (tmp + 1))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_DEC_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (((tmp == 0) or (tmp > src))) else (tmp - 1))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_I32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].i32)
|
|
src = DATA.i32
|
|
MEM[addr].i32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.i32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_I32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].i32)
|
|
src = DATA.i32
|
|
MEM[addr].i32 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.i32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_AND_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp & DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_OR_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp | DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_XOR_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp ^ DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MSKOR_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = ((tmp & ~DATA.b32) | DATA2.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_WRITE_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET.u32].b32 = DATA[31 : 0]
|
|
return {}
|
|
|
|
def _DSOp_DS_WRITE2_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET0.u32 * 4].b32 = DATA[31 : 0]
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET1.u32 * 4].b32 = DATA2[31 : 0]
|
|
return {}
|
|
|
|
def _DSOp_DS_WRITE2ST64_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET0.u32 * 256].b32 = DATA[31 : 0]
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET1.u32 * 256].b32 = DATA2[31 : 0]
|
|
return {}
|
|
|
|
def _DSOp_DS_CMPST_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
src = DATA2.b32
|
|
cmp = DATA.b32
|
|
MEM[addr].b32 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_CMPST_F32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f32)
|
|
src = DATA2.f32
|
|
cmp = DATA.f32
|
|
MEM[addr].f32 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.f32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_F32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f32)
|
|
src = DATA.f32
|
|
MEM[addr].f32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.f32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_F32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f32)
|
|
src = DATA.f32
|
|
MEM[addr].f32 = ((src) if (src > tmp) else (tmp))
|
|
RETURN_DATA.f32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_ADD_F32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f32)
|
|
MEM[addr].f32 += DATA.f32
|
|
RETURN_DATA.f32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_PK_ADD_F16(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR])
|
|
src = DATA
|
|
dst[31 : 16].f16 = tmp[31 : 16].f16 + src[31 : 16].f16
|
|
dst[15 : 0].f16 = tmp[15 : 0].f16 + src[15 : 0].f16
|
|
MEM[ADDR] = dst.b32
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _DSOp_DS_PK_ADD_BF16(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR])
|
|
src = DATA
|
|
dst[31 : 16].bf16 = tmp[31 : 16].bf16 + src[31 : 16].bf16
|
|
dst[15 : 0].bf16 = tmp[15 : 0].bf16 + src[15 : 0].bf16
|
|
MEM[ADDR] = dst.b32
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _DSOp_DS_WRITE_B8(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
MEM[ADDR].b8 = DATA[7 : 0]
|
|
return {}
|
|
|
|
def _DSOp_DS_WRITE_B16(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
MEM[ADDR].b16 = DATA[15 : 0]
|
|
return {}
|
|
|
|
def _DSOp_DS_ADD_RTN_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 += DATA.u32
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_SUB_RTN_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 -= DATA.u32
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_RSUB_RTN_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 = DATA.u32 - MEM[addr].u32
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_INC_RTN_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((0) if (tmp >= src) else (tmp + 1))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_DEC_RTN_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (((tmp == 0) or (tmp > src))) else (tmp - 1))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_RTN_I32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].i32)
|
|
src = DATA.i32
|
|
MEM[addr].i32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.i32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_RTN_I32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].i32)
|
|
src = DATA.i32
|
|
MEM[addr].i32 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.i32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_RTN_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_RTN_U32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_AND_RTN_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp & DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_OR_RTN_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp | DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_XOR_RTN_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp ^ DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MSKOR_RTN_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = ((tmp & ~DATA.b32) | DATA2.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_WRXCHG_RTN_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = DATA.b32
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_WRXCHG2_RTN_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
ADDR_BASE = ADDR
|
|
# --- compiled pseudocode ---
|
|
addr1 = ADDR_BASE.u32 + OFFSET0.u32 * 4
|
|
addr2 = ADDR_BASE.u32 + OFFSET1.u32 * 4
|
|
tmp1 = MEM[addr1].b32
|
|
tmp2 = MEM[addr2].b32
|
|
MEM[addr1].b32 = DATA.b32
|
|
MEM[addr2].b32 = DATA2.b32
|
|
RETURN_DATA[31 : 0] = tmp1
|
|
RETURN_DATA[63 : 32] = tmp2
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_WRXCHG2ST64_RTN_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
ADDR_BASE = ADDR
|
|
# --- compiled pseudocode ---
|
|
addr1 = ADDR_BASE.u32 + OFFSET0.u32 * 256
|
|
addr2 = ADDR_BASE.u32 + OFFSET1.u32 * 256
|
|
tmp1 = MEM[addr1].b32
|
|
tmp2 = MEM[addr2].b32
|
|
MEM[addr1].b32 = DATA.b32
|
|
MEM[addr2].b32 = DATA2.b32
|
|
RETURN_DATA[31 : 0] = tmp1
|
|
RETURN_DATA[63 : 32] = tmp2
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_CMPST_RTN_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
src = DATA2.b32
|
|
cmp = DATA.b32
|
|
MEM[addr].b32 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_CMPST_RTN_F32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f32)
|
|
src = DATA2.f32
|
|
cmp = DATA.f32
|
|
MEM[addr].f32 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.f32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_RTN_F32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f32)
|
|
src = DATA.f32
|
|
MEM[addr].f32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.f32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_RTN_F32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f32)
|
|
src = DATA.f32
|
|
MEM[addr].f32 = ((src) if (src > tmp) else (tmp))
|
|
RETURN_DATA.f32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_WRAP_RTN_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR].u32)
|
|
MEM[ADDR].u32 = ((tmp - DATA.u32) if (tmp >= DATA.u32) else (tmp + DATA2.u32))
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _DSOp_DS_ADD_RTN_F32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f32)
|
|
MEM[addr].f32 += DATA.f32
|
|
RETURN_DATA.f32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[31 : 0] = MEM[addr + OFFSET.u32].b32
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ2_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[31 : 0] = MEM[addr + OFFSET0.u32 * 4].b32
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[63 : 32] = MEM[addr + OFFSET1.u32 * 4].b32
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ2ST64_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[31 : 0] = MEM[addr + OFFSET0.u32 * 256].b32
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[63 : 32] = MEM[addr + OFFSET1.u32 * 256].b32
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_I8(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
RETURN_DATA.i32 = (signext(MEM[ADDR].i8))
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_U8(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
RETURN_DATA.u32 = (_pack(0, MEM[ADDR].u8))
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_I16(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
RETURN_DATA.i32 = (signext(MEM[ADDR].i16))
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_U16(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
RETURN_DATA.u32 = (_pack(0, MEM[ADDR].u16))
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_PERMUTE_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
for i in range(0, int(63)+1):
|
|
tmp[i] = 0x0
|
|
for i in range(0, int(63)+1):
|
|
if EXEC[i].u1:
|
|
dst_lane = (VGPR[i][ADDR].u32 + OFFSET.u32) / 4 % 64
|
|
tmp[dst_lane] = VGPR[i][DATA0]
|
|
for i in range(0, int(63)+1):
|
|
if EXEC[i].u1:
|
|
VGPR[i][VDST] = tmp[i]
|
|
return {}
|
|
|
|
def _DSOp_DS_BPERMUTE_B32(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
for i in range(0, int(63)+1):
|
|
tmp[i] = 0x0
|
|
for i in range(0, int(63)+1):
|
|
src_lane = (VGPR[i][ADDR].u32 + OFFSET.u32) / 4 % 64
|
|
if EXEC[src_lane].u1:
|
|
tmp[i] = VGPR[src_lane][DATA0]
|
|
for i in range(0, int(63)+1):
|
|
if EXEC[i].u1:
|
|
VGPR[i][VDST] = tmp[i]
|
|
return {}
|
|
|
|
def _DSOp_DS_ADD_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
MEM[addr].u64 += DATA.u64
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_SUB_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
MEM[addr].u64 -= DATA.u64
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_RSUB_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
MEM[addr].u64 = DATA.u64 - MEM[addr].u64
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_INC_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((0) if (tmp >= src) else (tmp + 1))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_DEC_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (((tmp == 0) or (tmp > src))) else (tmp - 1))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_I64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].i64)
|
|
src = DATA.i64
|
|
MEM[addr].i64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.i64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_I64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].i64)
|
|
src = DATA.i64
|
|
MEM[addr].i64 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.i64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_AND_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp & DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_OR_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp | DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_XOR_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp ^ DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MSKOR_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = ((tmp & ~DATA.b64) | DATA2.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_WRITE_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET.u32].b32 = DATA[31 : 0]
|
|
MEM[addr + OFFSET.u32 + 4].b32 = DATA[63 : 32]
|
|
return {}
|
|
|
|
def _DSOp_DS_WRITE2_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET0.u32 * 8].b32 = DATA[31 : 0]
|
|
MEM[addr + OFFSET0.u32 * 8 + 4].b32 = DATA[63 : 32]
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET1.u32 * 8].b32 = DATA2[31 : 0]
|
|
MEM[addr + OFFSET1.u32 * 8 + 4].b32 = DATA2[63 : 32]
|
|
return {}
|
|
|
|
def _DSOp_DS_WRITE2ST64_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET0.u32 * 512].b32 = DATA[31 : 0]
|
|
MEM[addr + OFFSET0.u32 * 512 + 4].b32 = DATA[63 : 32]
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET1.u32 * 512].b32 = DATA2[31 : 0]
|
|
MEM[addr + OFFSET1.u32 * 512 + 4].b32 = DATA2[63 : 32]
|
|
return {}
|
|
|
|
def _DSOp_DS_CMPST_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
src = DATA2.b64
|
|
cmp = DATA.b64
|
|
MEM[addr].b64 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_CMPST_F64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f64)
|
|
src = DATA2.f64
|
|
cmp = DATA.f64
|
|
MEM[addr].f64 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.f64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_F64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f64)
|
|
src = DATA.f64
|
|
MEM[addr].f64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.f64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_F64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f64)
|
|
src = DATA.f64
|
|
MEM[addr].f64 = ((src) if (src > tmp) else (tmp))
|
|
RETURN_DATA.f64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_WRITE_B8_D16_HI(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
MEM[ADDR].b8 = DATA[23 : 16]
|
|
return {}
|
|
|
|
def _DSOp_DS_WRITE_B16_D16_HI(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
MEM[ADDR].b16 = DATA[31 : 16]
|
|
return {}
|
|
|
|
def _DSOp_DS_READ_U8_D16(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
RETURN_DATA[15 : 0].u16 = (_pack(0, MEM[ADDR].u8))
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_U8_D16_HI(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
RETURN_DATA[31 : 16].u16 = (_pack(0, MEM[ADDR].u8))
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_I8_D16(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
RETURN_DATA[15 : 0].i16 = (signext(MEM[ADDR].i8))
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_I8_D16_HI(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
RETURN_DATA[31 : 16].i16 = (signext(MEM[ADDR].i8))
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_U16_D16(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
RETURN_DATA[15 : 0].u16 = MEM[ADDR].u16
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_U16_D16_HI(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
RETURN_DATA[31 : 16].u16 = MEM[ADDR].u16
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_ADD_F64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR].f64)
|
|
MEM[ADDR].f64 += DATA.f64
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _DSOp_DS_ADD_RTN_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
MEM[addr].u64 += DATA.u64
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_SUB_RTN_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
MEM[addr].u64 -= DATA.u64
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_RSUB_RTN_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
MEM[addr].u64 = DATA.u64 - MEM[addr].u64
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_INC_RTN_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((0) if (tmp >= src) else (tmp + 1))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_DEC_RTN_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (((tmp == 0) or (tmp > src))) else (tmp - 1))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_RTN_I64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].i64)
|
|
src = DATA.i64
|
|
MEM[addr].i64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.i64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_RTN_I64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].i64)
|
|
src = DATA.i64
|
|
MEM[addr].i64 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.i64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_RTN_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_RTN_U64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_AND_RTN_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp & DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_OR_RTN_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp | DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_XOR_RTN_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp ^ DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MSKOR_RTN_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = ((tmp & ~DATA.b64) | DATA2.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_WRXCHG_RTN_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = DATA.b64
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_WRXCHG2_RTN_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
ADDR_BASE = ADDR
|
|
# --- compiled pseudocode ---
|
|
addr1 = ADDR_BASE.u32 + OFFSET0.u32 * 8
|
|
addr2 = ADDR_BASE.u32 + OFFSET1.u32 * 8
|
|
tmp1 = MEM[addr1].b64
|
|
tmp2 = MEM[addr2].b64
|
|
MEM[addr1].b64 = DATA.b64
|
|
MEM[addr2].b64 = DATA2.b64
|
|
RETURN_DATA[63 : 0] = tmp1
|
|
RETURN_DATA[127 : 64] = tmp2
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_WRXCHG2ST64_RTN_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
ADDR_BASE = ADDR
|
|
# --- compiled pseudocode ---
|
|
addr1 = ADDR_BASE.u32 + OFFSET0.u32 * 512
|
|
addr2 = ADDR_BASE.u32 + OFFSET1.u32 * 512
|
|
tmp1 = MEM[addr1].b64
|
|
tmp2 = MEM[addr2].b64
|
|
MEM[addr1].b64 = DATA.b64
|
|
MEM[addr2].b64 = DATA2.b64
|
|
RETURN_DATA[63 : 0] = tmp1
|
|
RETURN_DATA[127 : 64] = tmp2
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_CMPST_RTN_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
src = DATA2.b64
|
|
cmp = DATA.b64
|
|
MEM[addr].b64 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_CMPST_RTN_F64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
DATA2 = DATA1
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f64)
|
|
src = DATA2.f64
|
|
cmp = DATA.f64
|
|
MEM[addr].f64 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.f64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MIN_RTN_F64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f64)
|
|
src = DATA.f64
|
|
MEM[addr].f64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.f64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_MAX_RTN_F64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32)
|
|
tmp = Reg(MEM[addr].f64)
|
|
src = DATA.f64
|
|
MEM[addr].f64 = ((src) if (src > tmp) else (tmp))
|
|
RETURN_DATA.f64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[31 : 0] = MEM[addr + OFFSET.u32].b32
|
|
RETURN_DATA[63 : 32] = MEM[addr + OFFSET.u32 + 4].b32
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ2_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[31 : 0] = MEM[addr + OFFSET0.u32 * 8].b32
|
|
RETURN_DATA[63 : 32] = MEM[addr + OFFSET0.u32 * 8 + 4].b32
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[95 : 64] = MEM[addr + OFFSET1.u32 * 8].b32
|
|
RETURN_DATA[127 : 96] = MEM[addr + OFFSET1.u32 * 8 + 4].b32
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ2ST64_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[31 : 0] = MEM[addr + OFFSET0.u32 * 512].b32
|
|
RETURN_DATA[63 : 32] = MEM[addr + OFFSET0.u32 * 512 + 4].b32
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[95 : 64] = MEM[addr + OFFSET1.u32 * 512].b32
|
|
RETURN_DATA[127 : 96] = MEM[addr + OFFSET1.u32 * 512 + 4].b32
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_ADD_RTN_F64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR].f64)
|
|
MEM[ADDR].f64 += DATA.f64
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _DSOp_DS_CONDXCHG32_RTN_B64(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
ADDR = S0.u32
|
|
DATA = S1.u64
|
|
offset = _pack(OFFSET1, OFFSET0)
|
|
RETURN_DATA[0] = LDS[ADDR0].u32
|
|
if DATA[31]:
|
|
LDS[ADDR0] = _pack(0, DATA[30 : 0])
|
|
RETURN_DATA[1] = LDS[ADDR1].u32
|
|
if DATA[63]:
|
|
LDS[ADDR1] = _pack(0, DATA[62 : 32])
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_PK_ADD_RTN_F16(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR])
|
|
src = DATA
|
|
dst[31 : 16].f16 = tmp[31 : 16].f16 + src[31 : 16].f16
|
|
dst[15 : 0].f16 = tmp[15 : 0].f16 + src[15 : 0].f16
|
|
MEM[ADDR] = dst.b32
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _DSOp_DS_PK_ADD_RTN_BF16(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR])
|
|
src = DATA
|
|
dst[31 : 16].bf16 = tmp[31 : 16].bf16 + src[31 : 16].bf16
|
|
dst[15 : 0].bf16 = tmp[15 : 0].bf16 + src[15 : 0].bf16
|
|
MEM[ADDR] = dst.b32
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _DSOp_DS_WRITE_B96(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET.u32].b32 = DATA[31 : 0]
|
|
MEM[addr + OFFSET.u32 + 4].b32 = DATA[63 : 32]
|
|
MEM[addr + OFFSET.u32 + 8].b32 = DATA[95 : 64]
|
|
return {}
|
|
|
|
def _DSOp_DS_WRITE_B128(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
MEM[addr + OFFSET.u32].b32 = DATA[31 : 0]
|
|
MEM[addr + OFFSET.u32 + 4].b32 = DATA[63 : 32]
|
|
MEM[addr + OFFSET.u32 + 8].b32 = DATA[95 : 64]
|
|
MEM[addr + OFFSET.u32 + 12].b32 = DATA[127 : 96]
|
|
return {}
|
|
|
|
def _DSOp_DS_READ_B96(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[31 : 0] = MEM[addr + OFFSET.u32].b32
|
|
RETURN_DATA[63 : 32] = MEM[addr + OFFSET.u32 + 4].b32
|
|
RETURN_DATA[95 : 64] = MEM[addr + OFFSET.u32 + 8].b32
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _DSOp_DS_READ_B128(MEM, ADDR, DATA0, DATA1, OFFSET0, OFFSET1, RETURN_DATA):
|
|
DATA = DATA0
|
|
OFFSET = OFFSET0
|
|
# --- compiled pseudocode ---
|
|
addr = CalcDsAddr(ADDR.b32, 0x0, 0x0)
|
|
RETURN_DATA[31 : 0] = MEM[addr + OFFSET.u32].b32
|
|
RETURN_DATA[63 : 32] = MEM[addr + OFFSET.u32 + 4].b32
|
|
RETURN_DATA[95 : 64] = MEM[addr + OFFSET.u32 + 8].b32
|
|
RETURN_DATA[127 : 96] = MEM[addr + OFFSET.u32 + 12].b32
|
|
OFFSET = Unsigned immediate byte offset.
|
|
OFFEN = Send offset either as VADDR or as zero..
|
|
IDXEN = Send index either as VADDR or as zero.
|
|
VADDR = VGPR address source.
|
|
VDATA = Destination vector GPR.
|
|
SOFFSET = Byte offset added to the memory address of an SGPR.
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 0].b32 = ConvertFromFormat(MEM[addr + ChannelOffsetX()])
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 0].b32 = ConvertFromFormat(MEM[addr + ChannelOffsetX()])
|
|
VDATA[63 : 32].b32 = ConvertFromFormat(MEM[addr + ChannelOffsetY()])
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 0].b32 = ConvertFromFormat(MEM[addr + ChannelOffsetX()])
|
|
VDATA[63 : 32].b32 = ConvertFromFormat(MEM[addr + ChannelOffsetY()])
|
|
VDATA[95 : 64].b32 = ConvertFromFormat(MEM[addr + ChannelOffsetZ()])
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 0].b32 = ConvertFromFormat(MEM[addr + ChannelOffsetX()])
|
|
VDATA[63 : 32].b32 = ConvertFromFormat(MEM[addr + ChannelOffsetY()])
|
|
VDATA[95 : 64].b32 = ConvertFromFormat(MEM[addr + ChannelOffsetZ()])
|
|
VDATA[127 : 96].b32 = ConvertFromFormat(MEM[addr + ChannelOffsetW()])
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr + ChannelOffsetX()] = ConvertToFormat(VDATA[31 : 0].b32)
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr + ChannelOffsetX()] = ConvertToFormat(VDATA[31 : 0].b32)
|
|
MEM[addr + ChannelOffsetY()] = ConvertToFormat(VDATA[63 : 32].b32)
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr + ChannelOffsetX()] = ConvertToFormat(VDATA[31 : 0].b32)
|
|
MEM[addr + ChannelOffsetY()] = ConvertToFormat(VDATA[63 : 32].b32)
|
|
MEM[addr + ChannelOffsetZ()] = ConvertToFormat(VDATA[95 : 64].b32)
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr + ChannelOffsetX()] = ConvertToFormat(VDATA[31 : 0].b32)
|
|
MEM[addr + ChannelOffsetY()] = ConvertToFormat(VDATA[63 : 32].b32)
|
|
MEM[addr + ChannelOffsetZ()] = ConvertToFormat(VDATA[95 : 64].b32)
|
|
MEM[addr + ChannelOffsetW()] = ConvertToFormat(VDATA[127 : 96].b32)
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[15 : 0].b16 = (ConvertFromFormat(MEM[addr + ChannelOffsetX()]))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[15 : 0].b16 = (ConvertFromFormat(MEM[addr + ChannelOffsetX()]))
|
|
VDATA[31 : 16].b16 = (ConvertFromFormat(MEM[addr + ChannelOffsetY()]))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[15 : 0].b16 = (ConvertFromFormat(MEM[addr + ChannelOffsetX()]))
|
|
VDATA[31 : 16].b16 = (ConvertFromFormat(MEM[addr + ChannelOffsetY()]))
|
|
VDATA[47 : 32].b16 = (ConvertFromFormat(MEM[addr + ChannelOffsetZ()]))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[15 : 0].b16 = (ConvertFromFormat(MEM[addr + ChannelOffsetX()]))
|
|
VDATA[31 : 16].b16 = (ConvertFromFormat(MEM[addr + ChannelOffsetY()]))
|
|
VDATA[47 : 32].b16 = (ConvertFromFormat(MEM[addr + ChannelOffsetZ()]))
|
|
VDATA[63 : 48].b16 = (ConvertFromFormat(MEM[addr + ChannelOffsetW()]))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr + ChannelOffsetX()] = ConvertToFormat((VDATA[15 : 0].b16))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr + ChannelOffsetX()] = ConvertToFormat((VDATA[15 : 0].b16))
|
|
MEM[addr + ChannelOffsetY()] = ConvertToFormat((VDATA[31 : 16].b16))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr + ChannelOffsetX()] = ConvertToFormat((VDATA[15 : 0].b16))
|
|
MEM[addr + ChannelOffsetY()] = ConvertToFormat((VDATA[31 : 16].b16))
|
|
MEM[addr + ChannelOffsetZ()] = ConvertToFormat((VDATA[47 : 32].b16))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr + ChannelOffsetX()] = ConvertToFormat((VDATA[15 : 0].b16))
|
|
MEM[addr + ChannelOffsetY()] = ConvertToFormat((VDATA[31 : 16].b16))
|
|
MEM[addr + ChannelOffsetZ()] = ConvertToFormat((VDATA[47 : 32].b16))
|
|
MEM[addr + ChannelOffsetW()] = ConvertToFormat((VDATA[63 : 48].b16))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA.u32 = (_pack(0, MEM[addr].u8))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA.i32 = (signext(MEM[addr].i8))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA.u32 = (_pack(0, MEM[addr].u16))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA.i32 = (signext(MEM[addr].i16))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
VDATA[95 : 64] = MEM[addr + 8].b32
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
VDATA[95 : 64] = MEM[addr + 8].b32
|
|
VDATA[127 : 96] = MEM[addr + 12].b32
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr].b8 = VDATA[7 : 0]
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr].b8 = VDATA[23 : 16]
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr].b16 = VDATA[15 : 0]
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr].b16 = VDATA[31 : 16]
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
MEM[addr + 8].b32 = VDATA[95 : 64]
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
MEM[addr + 8].b32 = VDATA[95 : 64]
|
|
MEM[addr + 12].b32 = VDATA[127 : 96]
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[15 : 0].u16 = (_pack(0, MEM[addr].u8))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 16].u16 = (_pack(0, MEM[addr].u8))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[15 : 0].i16 = (signext(MEM[addr].i8))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 16].i16 = (signext(MEM[addr].i8))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[15 : 0].b16 = MEM[addr].b16
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 16].b16 = MEM[addr].b16
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
VDATA[31 : 16].b16 = (ConvertFromFormat(MEM[addr + ChannelOffsetX()]))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
MEM[addr + ChannelOffsetX()] = ConvertToFormat((VDATA[31 : 16].b16))
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = DATA.b32
|
|
RETURN_DATA.b32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA[31 : 0].u32
|
|
cmp = DATA[63 : 32].u32
|
|
MEM[addr].u32 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 += DATA.u32
|
|
RETURN_DATA.u32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 -= DATA.u32
|
|
RETURN_DATA.u32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].i32)
|
|
src = DATA.i32
|
|
MEM[addr].i32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.i32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].i32)
|
|
src = DATA.i32
|
|
MEM[addr].i32 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.i32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp & DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp | DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp ^ DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((0) if (tmp >= src) else (tmp + 1))
|
|
RETURN_DATA.u32 = tmp
|
|
addr = CalcBufferAddr(VADDR.b32, SRSRC.b32, SOFFSET.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (((tmp == 0) or (tmp > src))) else (tmp - 1))
|
|
RETURN_DATA.u32 = tmp
|
|
tmp = Reg(MEM[ADDR].f32)
|
|
MEM[ADDR].f32 += DATA.f32
|
|
RETURN_DATA = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
DSOp_FUNCTIONS = {
|
|
DSOp.DS_ADD_U32: _DSOp_DS_ADD_U32,
|
|
DSOp.DS_SUB_U32: _DSOp_DS_SUB_U32,
|
|
DSOp.DS_RSUB_U32: _DSOp_DS_RSUB_U32,
|
|
DSOp.DS_INC_U32: _DSOp_DS_INC_U32,
|
|
DSOp.DS_DEC_U32: _DSOp_DS_DEC_U32,
|
|
DSOp.DS_MIN_I32: _DSOp_DS_MIN_I32,
|
|
DSOp.DS_MAX_I32: _DSOp_DS_MAX_I32,
|
|
DSOp.DS_MIN_U32: _DSOp_DS_MIN_U32,
|
|
DSOp.DS_MAX_U32: _DSOp_DS_MAX_U32,
|
|
DSOp.DS_AND_B32: _DSOp_DS_AND_B32,
|
|
DSOp.DS_OR_B32: _DSOp_DS_OR_B32,
|
|
DSOp.DS_XOR_B32: _DSOp_DS_XOR_B32,
|
|
DSOp.DS_MSKOR_B32: _DSOp_DS_MSKOR_B32,
|
|
DSOp.DS_WRITE_B32: _DSOp_DS_WRITE_B32,
|
|
DSOp.DS_WRITE2_B32: _DSOp_DS_WRITE2_B32,
|
|
DSOp.DS_WRITE2ST64_B32: _DSOp_DS_WRITE2ST64_B32,
|
|
DSOp.DS_CMPST_B32: _DSOp_DS_CMPST_B32,
|
|
DSOp.DS_CMPST_F32: _DSOp_DS_CMPST_F32,
|
|
DSOp.DS_MIN_F32: _DSOp_DS_MIN_F32,
|
|
DSOp.DS_MAX_F32: _DSOp_DS_MAX_F32,
|
|
DSOp.DS_ADD_F32: _DSOp_DS_ADD_F32,
|
|
DSOp.DS_PK_ADD_F16: _DSOp_DS_PK_ADD_F16,
|
|
DSOp.DS_PK_ADD_BF16: _DSOp_DS_PK_ADD_BF16,
|
|
DSOp.DS_WRITE_B8: _DSOp_DS_WRITE_B8,
|
|
DSOp.DS_WRITE_B16: _DSOp_DS_WRITE_B16,
|
|
DSOp.DS_ADD_RTN_U32: _DSOp_DS_ADD_RTN_U32,
|
|
DSOp.DS_SUB_RTN_U32: _DSOp_DS_SUB_RTN_U32,
|
|
DSOp.DS_RSUB_RTN_U32: _DSOp_DS_RSUB_RTN_U32,
|
|
DSOp.DS_INC_RTN_U32: _DSOp_DS_INC_RTN_U32,
|
|
DSOp.DS_DEC_RTN_U32: _DSOp_DS_DEC_RTN_U32,
|
|
DSOp.DS_MIN_RTN_I32: _DSOp_DS_MIN_RTN_I32,
|
|
DSOp.DS_MAX_RTN_I32: _DSOp_DS_MAX_RTN_I32,
|
|
DSOp.DS_MIN_RTN_U32: _DSOp_DS_MIN_RTN_U32,
|
|
DSOp.DS_MAX_RTN_U32: _DSOp_DS_MAX_RTN_U32,
|
|
DSOp.DS_AND_RTN_B32: _DSOp_DS_AND_RTN_B32,
|
|
DSOp.DS_OR_RTN_B32: _DSOp_DS_OR_RTN_B32,
|
|
DSOp.DS_XOR_RTN_B32: _DSOp_DS_XOR_RTN_B32,
|
|
DSOp.DS_MSKOR_RTN_B32: _DSOp_DS_MSKOR_RTN_B32,
|
|
DSOp.DS_WRXCHG_RTN_B32: _DSOp_DS_WRXCHG_RTN_B32,
|
|
DSOp.DS_WRXCHG2_RTN_B32: _DSOp_DS_WRXCHG2_RTN_B32,
|
|
DSOp.DS_WRXCHG2ST64_RTN_B32: _DSOp_DS_WRXCHG2ST64_RTN_B32,
|
|
DSOp.DS_CMPST_RTN_B32: _DSOp_DS_CMPST_RTN_B32,
|
|
DSOp.DS_CMPST_RTN_F32: _DSOp_DS_CMPST_RTN_F32,
|
|
DSOp.DS_MIN_RTN_F32: _DSOp_DS_MIN_RTN_F32,
|
|
DSOp.DS_MAX_RTN_F32: _DSOp_DS_MAX_RTN_F32,
|
|
DSOp.DS_WRAP_RTN_B32: _DSOp_DS_WRAP_RTN_B32,
|
|
DSOp.DS_ADD_RTN_F32: _DSOp_DS_ADD_RTN_F32,
|
|
DSOp.DS_READ_B32: _DSOp_DS_READ_B32,
|
|
DSOp.DS_READ2_B32: _DSOp_DS_READ2_B32,
|
|
DSOp.DS_READ2ST64_B32: _DSOp_DS_READ2ST64_B32,
|
|
DSOp.DS_READ_I8: _DSOp_DS_READ_I8,
|
|
DSOp.DS_READ_U8: _DSOp_DS_READ_U8,
|
|
DSOp.DS_READ_I16: _DSOp_DS_READ_I16,
|
|
DSOp.DS_READ_U16: _DSOp_DS_READ_U16,
|
|
DSOp.DS_PERMUTE_B32: _DSOp_DS_PERMUTE_B32,
|
|
DSOp.DS_BPERMUTE_B32: _DSOp_DS_BPERMUTE_B32,
|
|
DSOp.DS_ADD_U64: _DSOp_DS_ADD_U64,
|
|
DSOp.DS_SUB_U64: _DSOp_DS_SUB_U64,
|
|
DSOp.DS_RSUB_U64: _DSOp_DS_RSUB_U64,
|
|
DSOp.DS_INC_U64: _DSOp_DS_INC_U64,
|
|
DSOp.DS_DEC_U64: _DSOp_DS_DEC_U64,
|
|
DSOp.DS_MIN_I64: _DSOp_DS_MIN_I64,
|
|
DSOp.DS_MAX_I64: _DSOp_DS_MAX_I64,
|
|
DSOp.DS_MIN_U64: _DSOp_DS_MIN_U64,
|
|
DSOp.DS_MAX_U64: _DSOp_DS_MAX_U64,
|
|
DSOp.DS_AND_B64: _DSOp_DS_AND_B64,
|
|
DSOp.DS_OR_B64: _DSOp_DS_OR_B64,
|
|
DSOp.DS_XOR_B64: _DSOp_DS_XOR_B64,
|
|
DSOp.DS_MSKOR_B64: _DSOp_DS_MSKOR_B64,
|
|
DSOp.DS_WRITE_B64: _DSOp_DS_WRITE_B64,
|
|
DSOp.DS_WRITE2_B64: _DSOp_DS_WRITE2_B64,
|
|
DSOp.DS_WRITE2ST64_B64: _DSOp_DS_WRITE2ST64_B64,
|
|
DSOp.DS_CMPST_B64: _DSOp_DS_CMPST_B64,
|
|
DSOp.DS_CMPST_F64: _DSOp_DS_CMPST_F64,
|
|
DSOp.DS_MIN_F64: _DSOp_DS_MIN_F64,
|
|
DSOp.DS_MAX_F64: _DSOp_DS_MAX_F64,
|
|
DSOp.DS_WRITE_B8_D16_HI: _DSOp_DS_WRITE_B8_D16_HI,
|
|
DSOp.DS_WRITE_B16_D16_HI: _DSOp_DS_WRITE_B16_D16_HI,
|
|
DSOp.DS_READ_U8_D16: _DSOp_DS_READ_U8_D16,
|
|
DSOp.DS_READ_U8_D16_HI: _DSOp_DS_READ_U8_D16_HI,
|
|
DSOp.DS_READ_I8_D16: _DSOp_DS_READ_I8_D16,
|
|
DSOp.DS_READ_I8_D16_HI: _DSOp_DS_READ_I8_D16_HI,
|
|
DSOp.DS_READ_U16_D16: _DSOp_DS_READ_U16_D16,
|
|
DSOp.DS_READ_U16_D16_HI: _DSOp_DS_READ_U16_D16_HI,
|
|
DSOp.DS_ADD_F64: _DSOp_DS_ADD_F64,
|
|
DSOp.DS_ADD_RTN_U64: _DSOp_DS_ADD_RTN_U64,
|
|
DSOp.DS_SUB_RTN_U64: _DSOp_DS_SUB_RTN_U64,
|
|
DSOp.DS_RSUB_RTN_U64: _DSOp_DS_RSUB_RTN_U64,
|
|
DSOp.DS_INC_RTN_U64: _DSOp_DS_INC_RTN_U64,
|
|
DSOp.DS_DEC_RTN_U64: _DSOp_DS_DEC_RTN_U64,
|
|
DSOp.DS_MIN_RTN_I64: _DSOp_DS_MIN_RTN_I64,
|
|
DSOp.DS_MAX_RTN_I64: _DSOp_DS_MAX_RTN_I64,
|
|
DSOp.DS_MIN_RTN_U64: _DSOp_DS_MIN_RTN_U64,
|
|
DSOp.DS_MAX_RTN_U64: _DSOp_DS_MAX_RTN_U64,
|
|
DSOp.DS_AND_RTN_B64: _DSOp_DS_AND_RTN_B64,
|
|
DSOp.DS_OR_RTN_B64: _DSOp_DS_OR_RTN_B64,
|
|
DSOp.DS_XOR_RTN_B64: _DSOp_DS_XOR_RTN_B64,
|
|
DSOp.DS_MSKOR_RTN_B64: _DSOp_DS_MSKOR_RTN_B64,
|
|
DSOp.DS_WRXCHG_RTN_B64: _DSOp_DS_WRXCHG_RTN_B64,
|
|
DSOp.DS_WRXCHG2_RTN_B64: _DSOp_DS_WRXCHG2_RTN_B64,
|
|
DSOp.DS_WRXCHG2ST64_RTN_B64: _DSOp_DS_WRXCHG2ST64_RTN_B64,
|
|
DSOp.DS_CMPST_RTN_B64: _DSOp_DS_CMPST_RTN_B64,
|
|
DSOp.DS_CMPST_RTN_F64: _DSOp_DS_CMPST_RTN_F64,
|
|
DSOp.DS_MIN_RTN_F64: _DSOp_DS_MIN_RTN_F64,
|
|
DSOp.DS_MAX_RTN_F64: _DSOp_DS_MAX_RTN_F64,
|
|
DSOp.DS_READ_B64: _DSOp_DS_READ_B64,
|
|
DSOp.DS_READ2_B64: _DSOp_DS_READ2_B64,
|
|
DSOp.DS_READ2ST64_B64: _DSOp_DS_READ2ST64_B64,
|
|
DSOp.DS_ADD_RTN_F64: _DSOp_DS_ADD_RTN_F64,
|
|
DSOp.DS_CONDXCHG32_RTN_B64: _DSOp_DS_CONDXCHG32_RTN_B64,
|
|
DSOp.DS_PK_ADD_RTN_F16: _DSOp_DS_PK_ADD_RTN_F16,
|
|
DSOp.DS_PK_ADD_RTN_BF16: _DSOp_DS_PK_ADD_RTN_BF16,
|
|
DSOp.DS_WRITE_B96: _DSOp_DS_WRITE_B96,
|
|
DSOp.DS_WRITE_B128: _DSOp_DS_WRITE_B128,
|
|
DSOp.DS_READ_B96: _DSOp_DS_READ_B96,
|
|
DSOp.DS_READ_B128: _DSOp_DS_READ_B128,
|
|
}
|
|
|
|
def _FLATOp_FLAT_LOAD_UBYTE(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA.u32 = (_pack(0, MEM[addr].u8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_SBYTE(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA.i32 = (signext(MEM[addr].i8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_USHORT(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA.u32 = (_pack(0, MEM[addr].u16))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_SSHORT(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA.i32 = (signext(MEM[addr].i16))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_DWORD(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_DWORDX2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_DWORDX3(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
VDATA[95 : 64] = MEM[addr + 8].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_DWORDX4(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
VDATA[95 : 64] = MEM[addr + 8].b32
|
|
VDATA[127 : 96] = MEM[addr + 12].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_STORE_BYTE(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
MEM[addr].b8 = VDATA[7 : 0]
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_STORE_BYTE_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
MEM[addr].b8 = VDATA[23 : 16]
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_STORE_SHORT(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
MEM[addr].b16 = VDATA[15 : 0]
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_STORE_SHORT_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
MEM[addr].b16 = VDATA[31 : 16]
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_STORE_DWORD(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_STORE_DWORDX2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_STORE_DWORDX3(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
MEM[addr + 8].b32 = VDATA[95 : 64]
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_STORE_DWORDX4(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
MEM[addr + 8].b32 = VDATA[95 : 64]
|
|
MEM[addr + 12].b32 = VDATA[127 : 96]
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_LOAD_UBYTE_D16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA[15 : 0].u16 = (_pack(0, MEM[addr].u8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_UBYTE_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 16].u16 = (_pack(0, MEM[addr].u8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_SBYTE_D16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA[15 : 0].i16 = (signext(MEM[addr].i8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_SBYTE_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 16].i16 = (signext(MEM[addr].i8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_SHORT_D16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA[15 : 0].b16 = MEM[addr].b16
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_LOAD_SHORT_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 16].b16 = MEM[addr].b16
|
|
return {'VDATA': VDATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_SWAP(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = DATA.b32
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_CMPSWAP(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA[31 : 0].u32
|
|
cmp = DATA[63 : 32].u32
|
|
MEM[addr].u32 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_ADD(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 += DATA.u32
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_SUB(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 -= DATA.u32
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_SMIN(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].i32)
|
|
src = DATA.i32
|
|
MEM[addr].i32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.i32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_UMIN(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_SMAX(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].i32)
|
|
src = DATA.i32
|
|
MEM[addr].i32 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.i32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_UMAX(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_AND(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp & DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_OR(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp | DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_XOR(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp ^ DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_INC(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((0) if (tmp >= src) else (tmp + 1))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_DEC(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (((tmp == 0) or (tmp > src))) else (tmp - 1))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_ADD_F32(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR].f32)
|
|
MEM[ADDR].f32 += DATA.f32
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_PK_ADD_F16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR])
|
|
src = DATA
|
|
dst[31 : 16].f16 = tmp[31 : 16].f16 + src[31 : 16].f16
|
|
dst[15 : 0].f16 = tmp[15 : 0].f16 + src[15 : 0].f16
|
|
MEM[ADDR] = dst.b32
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_ADD_F64(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR].f64)
|
|
MEM[ADDR].f64 += DATA.f64
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_MIN_F64(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].f64)
|
|
src = DATA.f64
|
|
MEM[addr].f64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.f64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_MAX_F64(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].f64)
|
|
src = DATA.f64
|
|
MEM[addr].f64 = ((src) if (src > tmp) else (tmp))
|
|
RETURN_DATA.f64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_PK_ADD_BF16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR])
|
|
src = DATA
|
|
dst[31 : 16].bf16 = tmp[31 : 16].bf16 + src[31 : 16].bf16
|
|
dst[15 : 0].bf16 = tmp[15 : 0].bf16 + src[15 : 0].bf16
|
|
MEM[ADDR] = dst.b32
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_SWAP_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = DATA.b64
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_CMPSWAP_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA[63 : 0].u64
|
|
cmp = DATA[127 : 64].u64
|
|
MEM[addr].u64 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_ADD_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
MEM[addr].u64 += DATA.u64
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_SUB_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
MEM[addr].u64 -= DATA.u64
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_SMIN_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].i64)
|
|
src = DATA.i64
|
|
MEM[addr].i64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.i64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_UMIN_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_SMAX_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].i64)
|
|
src = DATA.i64
|
|
MEM[addr].i64 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.i64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_UMAX_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_AND_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp & DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_OR_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp | DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_XOR_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp ^ DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_INC_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((0) if (tmp >= src) else (tmp + 1))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _FLATOp_FLAT_ATOMIC_DEC_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcFlatAddr(ADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (((tmp == 0) or (tmp > src))) else (tmp - 1))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
FLATOp_FUNCTIONS = {
|
|
FLATOp.FLAT_LOAD_UBYTE: _FLATOp_FLAT_LOAD_UBYTE,
|
|
FLATOp.FLAT_LOAD_SBYTE: _FLATOp_FLAT_LOAD_SBYTE,
|
|
FLATOp.FLAT_LOAD_USHORT: _FLATOp_FLAT_LOAD_USHORT,
|
|
FLATOp.FLAT_LOAD_SSHORT: _FLATOp_FLAT_LOAD_SSHORT,
|
|
FLATOp.FLAT_LOAD_DWORD: _FLATOp_FLAT_LOAD_DWORD,
|
|
FLATOp.FLAT_LOAD_DWORDX2: _FLATOp_FLAT_LOAD_DWORDX2,
|
|
FLATOp.FLAT_LOAD_DWORDX3: _FLATOp_FLAT_LOAD_DWORDX3,
|
|
FLATOp.FLAT_LOAD_DWORDX4: _FLATOp_FLAT_LOAD_DWORDX4,
|
|
FLATOp.FLAT_STORE_BYTE: _FLATOp_FLAT_STORE_BYTE,
|
|
FLATOp.FLAT_STORE_BYTE_D16_HI: _FLATOp_FLAT_STORE_BYTE_D16_HI,
|
|
FLATOp.FLAT_STORE_SHORT: _FLATOp_FLAT_STORE_SHORT,
|
|
FLATOp.FLAT_STORE_SHORT_D16_HI: _FLATOp_FLAT_STORE_SHORT_D16_HI,
|
|
FLATOp.FLAT_STORE_DWORD: _FLATOp_FLAT_STORE_DWORD,
|
|
FLATOp.FLAT_STORE_DWORDX2: _FLATOp_FLAT_STORE_DWORDX2,
|
|
FLATOp.FLAT_STORE_DWORDX3: _FLATOp_FLAT_STORE_DWORDX3,
|
|
FLATOp.FLAT_STORE_DWORDX4: _FLATOp_FLAT_STORE_DWORDX4,
|
|
FLATOp.FLAT_LOAD_UBYTE_D16: _FLATOp_FLAT_LOAD_UBYTE_D16,
|
|
FLATOp.FLAT_LOAD_UBYTE_D16_HI: _FLATOp_FLAT_LOAD_UBYTE_D16_HI,
|
|
FLATOp.FLAT_LOAD_SBYTE_D16: _FLATOp_FLAT_LOAD_SBYTE_D16,
|
|
FLATOp.FLAT_LOAD_SBYTE_D16_HI: _FLATOp_FLAT_LOAD_SBYTE_D16_HI,
|
|
FLATOp.FLAT_LOAD_SHORT_D16: _FLATOp_FLAT_LOAD_SHORT_D16,
|
|
FLATOp.FLAT_LOAD_SHORT_D16_HI: _FLATOp_FLAT_LOAD_SHORT_D16_HI,
|
|
FLATOp.FLAT_ATOMIC_SWAP: _FLATOp_FLAT_ATOMIC_SWAP,
|
|
FLATOp.FLAT_ATOMIC_CMPSWAP: _FLATOp_FLAT_ATOMIC_CMPSWAP,
|
|
FLATOp.FLAT_ATOMIC_ADD: _FLATOp_FLAT_ATOMIC_ADD,
|
|
FLATOp.FLAT_ATOMIC_SUB: _FLATOp_FLAT_ATOMIC_SUB,
|
|
FLATOp.FLAT_ATOMIC_SMIN: _FLATOp_FLAT_ATOMIC_SMIN,
|
|
FLATOp.FLAT_ATOMIC_UMIN: _FLATOp_FLAT_ATOMIC_UMIN,
|
|
FLATOp.FLAT_ATOMIC_SMAX: _FLATOp_FLAT_ATOMIC_SMAX,
|
|
FLATOp.FLAT_ATOMIC_UMAX: _FLATOp_FLAT_ATOMIC_UMAX,
|
|
FLATOp.FLAT_ATOMIC_AND: _FLATOp_FLAT_ATOMIC_AND,
|
|
FLATOp.FLAT_ATOMIC_OR: _FLATOp_FLAT_ATOMIC_OR,
|
|
FLATOp.FLAT_ATOMIC_XOR: _FLATOp_FLAT_ATOMIC_XOR,
|
|
FLATOp.FLAT_ATOMIC_INC: _FLATOp_FLAT_ATOMIC_INC,
|
|
FLATOp.FLAT_ATOMIC_DEC: _FLATOp_FLAT_ATOMIC_DEC,
|
|
FLATOp.FLAT_ATOMIC_ADD_F32: _FLATOp_FLAT_ATOMIC_ADD_F32,
|
|
FLATOp.FLAT_ATOMIC_PK_ADD_F16: _FLATOp_FLAT_ATOMIC_PK_ADD_F16,
|
|
FLATOp.FLAT_ATOMIC_ADD_F64: _FLATOp_FLAT_ATOMIC_ADD_F64,
|
|
FLATOp.FLAT_ATOMIC_MIN_F64: _FLATOp_FLAT_ATOMIC_MIN_F64,
|
|
FLATOp.FLAT_ATOMIC_MAX_F64: _FLATOp_FLAT_ATOMIC_MAX_F64,
|
|
FLATOp.FLAT_ATOMIC_PK_ADD_BF16: _FLATOp_FLAT_ATOMIC_PK_ADD_BF16,
|
|
FLATOp.FLAT_ATOMIC_SWAP_X2: _FLATOp_FLAT_ATOMIC_SWAP_X2,
|
|
FLATOp.FLAT_ATOMIC_CMPSWAP_X2: _FLATOp_FLAT_ATOMIC_CMPSWAP_X2,
|
|
FLATOp.FLAT_ATOMIC_ADD_X2: _FLATOp_FLAT_ATOMIC_ADD_X2,
|
|
FLATOp.FLAT_ATOMIC_SUB_X2: _FLATOp_FLAT_ATOMIC_SUB_X2,
|
|
FLATOp.FLAT_ATOMIC_SMIN_X2: _FLATOp_FLAT_ATOMIC_SMIN_X2,
|
|
FLATOp.FLAT_ATOMIC_UMIN_X2: _FLATOp_FLAT_ATOMIC_UMIN_X2,
|
|
FLATOp.FLAT_ATOMIC_SMAX_X2: _FLATOp_FLAT_ATOMIC_SMAX_X2,
|
|
FLATOp.FLAT_ATOMIC_UMAX_X2: _FLATOp_FLAT_ATOMIC_UMAX_X2,
|
|
FLATOp.FLAT_ATOMIC_AND_X2: _FLATOp_FLAT_ATOMIC_AND_X2,
|
|
FLATOp.FLAT_ATOMIC_OR_X2: _FLATOp_FLAT_ATOMIC_OR_X2,
|
|
FLATOp.FLAT_ATOMIC_XOR_X2: _FLATOp_FLAT_ATOMIC_XOR_X2,
|
|
FLATOp.FLAT_ATOMIC_INC_X2: _FLATOp_FLAT_ATOMIC_INC_X2,
|
|
FLATOp.FLAT_ATOMIC_DEC_X2: _FLATOp_FLAT_ATOMIC_DEC_X2,
|
|
}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_UBYTE(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA.u32 = (_pack(0, MEM[addr].u8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_SBYTE(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA.i32 = (signext(MEM[addr].i8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_USHORT(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA.u32 = (_pack(0, MEM[addr].u16))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_SSHORT(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA.i32 = (signext(MEM[addr].i16))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_DWORD(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_DWORDX2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_DWORDX3(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
VDATA[95 : 64] = MEM[addr + 8].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_DWORDX4(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
VDATA[95 : 64] = MEM[addr + 8].b32
|
|
VDATA[127 : 96] = MEM[addr + 12].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_STORE_BYTE(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b8 = VDATA[7 : 0]
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_STORE_BYTE_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b8 = VDATA[23 : 16]
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_STORE_SHORT(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b16 = VDATA[15 : 0]
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_STORE_SHORT_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b16 = VDATA[31 : 16]
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_STORE_DWORD(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_STORE_DWORDX2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_STORE_DWORDX3(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
MEM[addr + 8].b32 = VDATA[95 : 64]
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_STORE_DWORDX4(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
MEM[addr + 8].b32 = VDATA[95 : 64]
|
|
MEM[addr + 12].b32 = VDATA[127 : 96]
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_UBYTE_D16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[15 : 0].u16 = (_pack(0, MEM[addr].u8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_UBYTE_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 16].u16 = (_pack(0, MEM[addr].u8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_SBYTE_D16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[15 : 0].i16 = (signext(MEM[addr].i8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_SBYTE_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 16].i16 = (signext(MEM[addr].i8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_SHORT_D16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[15 : 0].b16 = MEM[addr].b16
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_LOAD_SHORT_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 16].b16 = MEM[addr].b16
|
|
return {'VDATA': VDATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_SWAP(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = DATA.b32
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_CMPSWAP(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA[31 : 0].u32
|
|
cmp = DATA[63 : 32].u32
|
|
MEM[addr].u32 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_ADD(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 += DATA.u32
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_SUB(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
MEM[addr].u32 -= DATA.u32
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_SMIN(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].i32)
|
|
src = DATA.i32
|
|
MEM[addr].i32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.i32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_UMIN(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_SMAX(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].i32)
|
|
src = DATA.i32
|
|
MEM[addr].i32 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.i32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_UMAX(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_AND(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp & DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_OR(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp | DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_XOR(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b32)
|
|
MEM[addr].b32 = (tmp ^ DATA.b32)
|
|
RETURN_DATA.b32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_INC(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((0) if (tmp >= src) else (tmp + 1))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_DEC(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u32)
|
|
src = DATA.u32
|
|
MEM[addr].u32 = ((src) if (((tmp == 0) or (tmp > src))) else (tmp - 1))
|
|
RETURN_DATA.u32 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_ADD_F32(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR].f32)
|
|
MEM[ADDR].f32 += DATA.f32
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_PK_ADD_F16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR])
|
|
src = DATA
|
|
dst[31 : 16].f16 = tmp[31 : 16].f16 + src[31 : 16].f16
|
|
dst[15 : 0].f16 = tmp[15 : 0].f16 + src[15 : 0].f16
|
|
MEM[ADDR] = dst.b32
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_ADD_F64(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR].f64)
|
|
MEM[ADDR].f64 += DATA.f64
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_MIN_F64(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].f64)
|
|
src = DATA.f64
|
|
MEM[addr].f64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.f64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_MAX_F64(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].f64)
|
|
src = DATA.f64
|
|
MEM[addr].f64 = ((src) if (src > tmp) else (tmp))
|
|
RETURN_DATA.f64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_PK_ADD_BF16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
tmp = Reg(MEM[ADDR])
|
|
src = DATA
|
|
dst[31 : 16].bf16 = tmp[31 : 16].bf16 + src[31 : 16].bf16
|
|
dst[15 : 0].bf16 = tmp[15 : 0].bf16 + src[15 : 0].bf16
|
|
MEM[ADDR] = dst.b32
|
|
RETURN_DATA = tmp
|
|
return {}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_SWAP_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = DATA.b64
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_CMPSWAP_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA[63 : 0].u64
|
|
cmp = DATA[127 : 64].u64
|
|
MEM[addr].u64 = ((src) if (tmp == cmp) else (tmp))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_ADD_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
MEM[addr].u64 += DATA.u64
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_SUB_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
MEM[addr].u64 -= DATA.u64
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_SMIN_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].i64)
|
|
src = DATA.i64
|
|
MEM[addr].i64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.i64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_UMIN_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (src < tmp) else (tmp))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_SMAX_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].i64)
|
|
src = DATA.i64
|
|
MEM[addr].i64 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.i64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_UMAX_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (src >= tmp) else (tmp))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_AND_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp & DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_OR_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp | DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_XOR_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].b64)
|
|
MEM[addr].b64 = (tmp ^ DATA.b64)
|
|
RETURN_DATA.b64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_INC_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((0) if (tmp >= src) else (tmp + 1))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
def _GLOBALOp_GLOBAL_ATOMIC_DEC_X2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcGlobalAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
tmp = Reg(MEM[addr].u64)
|
|
src = DATA.u64
|
|
MEM[addr].u64 = ((src) if (((tmp == 0) or (tmp > src))) else (tmp - 1))
|
|
RETURN_DATA.u64 = tmp
|
|
return {'RETURN_DATA': RETURN_DATA}
|
|
|
|
GLOBALOp_FUNCTIONS = {
|
|
GLOBALOp.GLOBAL_LOAD_UBYTE: _GLOBALOp_GLOBAL_LOAD_UBYTE,
|
|
GLOBALOp.GLOBAL_LOAD_SBYTE: _GLOBALOp_GLOBAL_LOAD_SBYTE,
|
|
GLOBALOp.GLOBAL_LOAD_USHORT: _GLOBALOp_GLOBAL_LOAD_USHORT,
|
|
GLOBALOp.GLOBAL_LOAD_SSHORT: _GLOBALOp_GLOBAL_LOAD_SSHORT,
|
|
GLOBALOp.GLOBAL_LOAD_DWORD: _GLOBALOp_GLOBAL_LOAD_DWORD,
|
|
GLOBALOp.GLOBAL_LOAD_DWORDX2: _GLOBALOp_GLOBAL_LOAD_DWORDX2,
|
|
GLOBALOp.GLOBAL_LOAD_DWORDX3: _GLOBALOp_GLOBAL_LOAD_DWORDX3,
|
|
GLOBALOp.GLOBAL_LOAD_DWORDX4: _GLOBALOp_GLOBAL_LOAD_DWORDX4,
|
|
GLOBALOp.GLOBAL_STORE_BYTE: _GLOBALOp_GLOBAL_STORE_BYTE,
|
|
GLOBALOp.GLOBAL_STORE_BYTE_D16_HI: _GLOBALOp_GLOBAL_STORE_BYTE_D16_HI,
|
|
GLOBALOp.GLOBAL_STORE_SHORT: _GLOBALOp_GLOBAL_STORE_SHORT,
|
|
GLOBALOp.GLOBAL_STORE_SHORT_D16_HI: _GLOBALOp_GLOBAL_STORE_SHORT_D16_HI,
|
|
GLOBALOp.GLOBAL_STORE_DWORD: _GLOBALOp_GLOBAL_STORE_DWORD,
|
|
GLOBALOp.GLOBAL_STORE_DWORDX2: _GLOBALOp_GLOBAL_STORE_DWORDX2,
|
|
GLOBALOp.GLOBAL_STORE_DWORDX3: _GLOBALOp_GLOBAL_STORE_DWORDX3,
|
|
GLOBALOp.GLOBAL_STORE_DWORDX4: _GLOBALOp_GLOBAL_STORE_DWORDX4,
|
|
GLOBALOp.GLOBAL_LOAD_UBYTE_D16: _GLOBALOp_GLOBAL_LOAD_UBYTE_D16,
|
|
GLOBALOp.GLOBAL_LOAD_UBYTE_D16_HI: _GLOBALOp_GLOBAL_LOAD_UBYTE_D16_HI,
|
|
GLOBALOp.GLOBAL_LOAD_SBYTE_D16: _GLOBALOp_GLOBAL_LOAD_SBYTE_D16,
|
|
GLOBALOp.GLOBAL_LOAD_SBYTE_D16_HI: _GLOBALOp_GLOBAL_LOAD_SBYTE_D16_HI,
|
|
GLOBALOp.GLOBAL_LOAD_SHORT_D16: _GLOBALOp_GLOBAL_LOAD_SHORT_D16,
|
|
GLOBALOp.GLOBAL_LOAD_SHORT_D16_HI: _GLOBALOp_GLOBAL_LOAD_SHORT_D16_HI,
|
|
GLOBALOp.GLOBAL_ATOMIC_SWAP: _GLOBALOp_GLOBAL_ATOMIC_SWAP,
|
|
GLOBALOp.GLOBAL_ATOMIC_CMPSWAP: _GLOBALOp_GLOBAL_ATOMIC_CMPSWAP,
|
|
GLOBALOp.GLOBAL_ATOMIC_ADD: _GLOBALOp_GLOBAL_ATOMIC_ADD,
|
|
GLOBALOp.GLOBAL_ATOMIC_SUB: _GLOBALOp_GLOBAL_ATOMIC_SUB,
|
|
GLOBALOp.GLOBAL_ATOMIC_SMIN: _GLOBALOp_GLOBAL_ATOMIC_SMIN,
|
|
GLOBALOp.GLOBAL_ATOMIC_UMIN: _GLOBALOp_GLOBAL_ATOMIC_UMIN,
|
|
GLOBALOp.GLOBAL_ATOMIC_SMAX: _GLOBALOp_GLOBAL_ATOMIC_SMAX,
|
|
GLOBALOp.GLOBAL_ATOMIC_UMAX: _GLOBALOp_GLOBAL_ATOMIC_UMAX,
|
|
GLOBALOp.GLOBAL_ATOMIC_AND: _GLOBALOp_GLOBAL_ATOMIC_AND,
|
|
GLOBALOp.GLOBAL_ATOMIC_OR: _GLOBALOp_GLOBAL_ATOMIC_OR,
|
|
GLOBALOp.GLOBAL_ATOMIC_XOR: _GLOBALOp_GLOBAL_ATOMIC_XOR,
|
|
GLOBALOp.GLOBAL_ATOMIC_INC: _GLOBALOp_GLOBAL_ATOMIC_INC,
|
|
GLOBALOp.GLOBAL_ATOMIC_DEC: _GLOBALOp_GLOBAL_ATOMIC_DEC,
|
|
GLOBALOp.GLOBAL_ATOMIC_ADD_F32: _GLOBALOp_GLOBAL_ATOMIC_ADD_F32,
|
|
GLOBALOp.GLOBAL_ATOMIC_PK_ADD_F16: _GLOBALOp_GLOBAL_ATOMIC_PK_ADD_F16,
|
|
GLOBALOp.GLOBAL_ATOMIC_ADD_F64: _GLOBALOp_GLOBAL_ATOMIC_ADD_F64,
|
|
GLOBALOp.GLOBAL_ATOMIC_MIN_F64: _GLOBALOp_GLOBAL_ATOMIC_MIN_F64,
|
|
GLOBALOp.GLOBAL_ATOMIC_MAX_F64: _GLOBALOp_GLOBAL_ATOMIC_MAX_F64,
|
|
GLOBALOp.GLOBAL_ATOMIC_PK_ADD_BF16: _GLOBALOp_GLOBAL_ATOMIC_PK_ADD_BF16,
|
|
GLOBALOp.GLOBAL_ATOMIC_SWAP_X2: _GLOBALOp_GLOBAL_ATOMIC_SWAP_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_CMPSWAP_X2: _GLOBALOp_GLOBAL_ATOMIC_CMPSWAP_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_ADD_X2: _GLOBALOp_GLOBAL_ATOMIC_ADD_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_SUB_X2: _GLOBALOp_GLOBAL_ATOMIC_SUB_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_SMIN_X2: _GLOBALOp_GLOBAL_ATOMIC_SMIN_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_UMIN_X2: _GLOBALOp_GLOBAL_ATOMIC_UMIN_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_SMAX_X2: _GLOBALOp_GLOBAL_ATOMIC_SMAX_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_UMAX_X2: _GLOBALOp_GLOBAL_ATOMIC_UMAX_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_AND_X2: _GLOBALOp_GLOBAL_ATOMIC_AND_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_OR_X2: _GLOBALOp_GLOBAL_ATOMIC_OR_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_XOR_X2: _GLOBALOp_GLOBAL_ATOMIC_XOR_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_INC_X2: _GLOBALOp_GLOBAL_ATOMIC_INC_X2,
|
|
GLOBALOp.GLOBAL_ATOMIC_DEC_X2: _GLOBALOp_GLOBAL_ATOMIC_DEC_X2,
|
|
}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_UBYTE(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA.u32 = (_pack(0, MEM[addr].u8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_SBYTE(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA.i32 = (signext(MEM[addr].i8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_USHORT(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA.u32 = (_pack(0, MEM[addr].u16))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_SSHORT(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA.i32 = (signext(MEM[addr].i16))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_DWORD(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_DWORDX2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_DWORDX3(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
VDATA[95 : 64] = MEM[addr + 8].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_DWORDX4(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 0] = MEM[addr].b32
|
|
VDATA[63 : 32] = MEM[addr + 4].b32
|
|
VDATA[95 : 64] = MEM[addr + 8].b32
|
|
VDATA[127 : 96] = MEM[addr + 12].b32
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_STORE_BYTE(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b8 = VDATA[7 : 0]
|
|
return {}
|
|
|
|
def _SCRATCHOp_SCRATCH_STORE_BYTE_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b8 = VDATA[23 : 16]
|
|
return {}
|
|
|
|
def _SCRATCHOp_SCRATCH_STORE_SHORT(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b16 = VDATA[15 : 0]
|
|
return {}
|
|
|
|
def _SCRATCHOp_SCRATCH_STORE_SHORT_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b16 = VDATA[31 : 16]
|
|
return {}
|
|
|
|
def _SCRATCHOp_SCRATCH_STORE_DWORD(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
return {}
|
|
|
|
def _SCRATCHOp_SCRATCH_STORE_DWORDX2(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
return {}
|
|
|
|
def _SCRATCHOp_SCRATCH_STORE_DWORDX3(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
MEM[addr + 8].b32 = VDATA[95 : 64]
|
|
return {}
|
|
|
|
def _SCRATCHOp_SCRATCH_STORE_DWORDX4(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
MEM[addr].b32 = VDATA[31 : 0]
|
|
MEM[addr + 4].b32 = VDATA[63 : 32]
|
|
MEM[addr + 8].b32 = VDATA[95 : 64]
|
|
MEM[addr + 12].b32 = VDATA[127 : 96]
|
|
return {}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_UBYTE_D16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[15 : 0].u16 = (_pack(0, MEM[addr].u8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_UBYTE_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 16].u16 = (_pack(0, MEM[addr].u8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_SBYTE_D16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[15 : 0].i16 = (signext(MEM[addr].i8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_SBYTE_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 16].i16 = (signext(MEM[addr].i8))
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_SHORT_D16(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[15 : 0].b16 = MEM[addr].b16
|
|
return {'VDATA': VDATA}
|
|
|
|
def _SCRATCHOp_SCRATCH_LOAD_SHORT_D16_HI(MEM, ADDR, VDATA, VDST, RETURN_DATA):
|
|
DATA = VDATA
|
|
# --- compiled pseudocode ---
|
|
addr = CalcScratchAddr(ADDR.b32, SADDR.b32, OFFSET.b32)
|
|
VDATA[31 : 16].b16 = MEM[addr].b16
|
|
return {'VDATA': VDATA}
|
|
|
|
SCRATCHOp_FUNCTIONS = {
|
|
SCRATCHOp.SCRATCH_LOAD_UBYTE: _SCRATCHOp_SCRATCH_LOAD_UBYTE,
|
|
SCRATCHOp.SCRATCH_LOAD_SBYTE: _SCRATCHOp_SCRATCH_LOAD_SBYTE,
|
|
SCRATCHOp.SCRATCH_LOAD_USHORT: _SCRATCHOp_SCRATCH_LOAD_USHORT,
|
|
SCRATCHOp.SCRATCH_LOAD_SSHORT: _SCRATCHOp_SCRATCH_LOAD_SSHORT,
|
|
SCRATCHOp.SCRATCH_LOAD_DWORD: _SCRATCHOp_SCRATCH_LOAD_DWORD,
|
|
SCRATCHOp.SCRATCH_LOAD_DWORDX2: _SCRATCHOp_SCRATCH_LOAD_DWORDX2,
|
|
SCRATCHOp.SCRATCH_LOAD_DWORDX3: _SCRATCHOp_SCRATCH_LOAD_DWORDX3,
|
|
SCRATCHOp.SCRATCH_LOAD_DWORDX4: _SCRATCHOp_SCRATCH_LOAD_DWORDX4,
|
|
SCRATCHOp.SCRATCH_STORE_BYTE: _SCRATCHOp_SCRATCH_STORE_BYTE,
|
|
SCRATCHOp.SCRATCH_STORE_BYTE_D16_HI: _SCRATCHOp_SCRATCH_STORE_BYTE_D16_HI,
|
|
SCRATCHOp.SCRATCH_STORE_SHORT: _SCRATCHOp_SCRATCH_STORE_SHORT,
|
|
SCRATCHOp.SCRATCH_STORE_SHORT_D16_HI: _SCRATCHOp_SCRATCH_STORE_SHORT_D16_HI,
|
|
SCRATCHOp.SCRATCH_STORE_DWORD: _SCRATCHOp_SCRATCH_STORE_DWORD,
|
|
SCRATCHOp.SCRATCH_STORE_DWORDX2: _SCRATCHOp_SCRATCH_STORE_DWORDX2,
|
|
SCRATCHOp.SCRATCH_STORE_DWORDX3: _SCRATCHOp_SCRATCH_STORE_DWORDX3,
|
|
SCRATCHOp.SCRATCH_STORE_DWORDX4: _SCRATCHOp_SCRATCH_STORE_DWORDX4,
|
|
SCRATCHOp.SCRATCH_LOAD_UBYTE_D16: _SCRATCHOp_SCRATCH_LOAD_UBYTE_D16,
|
|
SCRATCHOp.SCRATCH_LOAD_UBYTE_D16_HI: _SCRATCHOp_SCRATCH_LOAD_UBYTE_D16_HI,
|
|
SCRATCHOp.SCRATCH_LOAD_SBYTE_D16: _SCRATCHOp_SCRATCH_LOAD_SBYTE_D16,
|
|
SCRATCHOp.SCRATCH_LOAD_SBYTE_D16_HI: _SCRATCHOp_SCRATCH_LOAD_SBYTE_D16_HI,
|
|
SCRATCHOp.SCRATCH_LOAD_SHORT_D16: _SCRATCHOp_SCRATCH_LOAD_SHORT_D16,
|
|
SCRATCHOp.SCRATCH_LOAD_SHORT_D16_HI: _SCRATCHOp_SCRATCH_LOAD_SHORT_D16_HI,
|
|
}
|
|
|
|
COMPILED_FUNCTIONS = {
|
|
SOP1Op: SOP1Op_FUNCTIONS,
|
|
SOP2Op: SOP2Op_FUNCTIONS,
|
|
SOPCOp: SOPCOp_FUNCTIONS,
|
|
SOPKOp: SOPKOp_FUNCTIONS,
|
|
SOPPOp: SOPPOp_FUNCTIONS,
|
|
VOP1Op: VOP1Op_FUNCTIONS,
|
|
VOP2Op: VOP2Op_FUNCTIONS,
|
|
VOP3POp: VOP3POp_FUNCTIONS,
|
|
VOPCOp: VOPCOp_FUNCTIONS,
|
|
VOP3AOp: VOP3AOp_FUNCTIONS,
|
|
VOP3BOp: VOP3BOp_FUNCTIONS,
|
|
DSOp: DSOp_FUNCTIONS,
|
|
FLATOp: FLATOp_FUNCTIONS,
|
|
GLOBALOp: GLOBALOp_FUNCTIONS,
|
|
SCRATCHOp: SCRATCHOp_FUNCTIONS,
|
|
}
|
|
|
|
def get_compiled_functions(): return COMPILED_FUNCTIONS |