diff --git a/extra/assembly/amd/autogen/cdna/gen_pcode.py b/extra/assembly/amd/autogen/cdna/gen_pcode.py index 43e279b68e..d6d79c1a84 100644 --- a/extra/assembly/amd/autogen/cdna/gen_pcode.py +++ b/extra/assembly/amd/autogen/cdna/gen_pcode.py @@ -5,891 +5,309 @@ from extra.assembly.amd.autogen.cdna.enum import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3POp, VOPCOp, VOP3AOp, VOP3BOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b64 = S0.b64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b32 = S0.b32 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b64 = S0.b64 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~S0.u64; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~S0.u64 SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # declare i : 6'U; - # for i in 6'0U : 6'31U do - # tmp[i] = S0.u32[i & 6'60U +: 6'4U] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp[i] = S0.u32[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # declare i : 6'U; - # for i in 6'0U : 6'63U do - # tmp[i] = S0.u64[i & 6'60U +: 6'4U] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp[i] = S0.u64[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[63 : 0] = S0.u64[0 : 63] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[63 : 0] = S0.u64[0 : 63] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_FF0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no zeros are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'0U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FF0_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[i] == 0: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FF0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no zeros are found - # for i in 0 : 63 do - # // Search from LSB - # if S0.u64[i] == 1'0U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FF0_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[i] == 0: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FF1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FF1_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FF1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from LSB - # if S0.u64[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FF1_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLBIT_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLBIT_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLBIT_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from MSB - # if S0.u64[63 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLBIT_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[63 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLBIT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.u32[31 - i] != S0.u32[31] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLBIT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(31)+1): if S0.u32[31 - i] != S0.u32[31]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLBIT_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 63 do - # // Search from MSB - # if S0.u64[63 - i] != S0.u64[63] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLBIT_I32_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(63)+1): if S0.u64[63 - i] != S0.u64[63]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i8)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i8)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_GETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = PC + 4 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # jump_addr = S0.i64; - # D0.i64 = PC + 4LL; - # PC = jump_addr.i64 - S0 = Reg(s0) - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SWAPPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): jump_addr = S0.i64 D0.i64 = PC + 4 PC = Reg(jump_addr.i64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} -def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_RFE_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set - # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_ANDN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ANDN2_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_ORN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ORN2_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NAND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XNOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # for i in 0 : 7 do - # tmp[i] = S0.u32[i * 4 +: 4] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(7)+1): tmp[i] = S0.u32[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # for i in 0 : 15 do - # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(15)+1): tmp[i] = S0.u64[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_ABS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((-S0.i32) if (S0.i32 < 0) else (S0.i32)) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_SET_GPR_IDX_IDX(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # M0[7 : 0] = S0.u32[7 : 0].b8 - S0 = Reg(s0) - # --- compiled pseudocode --- +def _SOP1Op_S_SET_GPR_IDX_IDX(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): M0[7 : 0] = S0.u32[7 : 0].b8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOP1Op_S_ANDN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ANDN1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_ORN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ORN1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_ANDN1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ANDN1_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_ANDN2_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ANDN2_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32; - # for i in 0 : 31 do - # D0.u64[i * 2] = tmp[i]; - # D0.u64[i * 2 + 1] = tmp[i] - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITREPLICATE_B64_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32) for i in range(0, int(31)+1): D0.u64[i * 2] = tmp[i] D0.u64[i * 2 + 1] = tmp[i] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} SOP1Op_FUNCTIONS = { SOP1Op.S_MOV_B32: _SOP1Op_S_MOV_B32, @@ -943,747 +361,268 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_BITREPLICATE_B64_B32: _SOP1Op_S_BITREPLICATE_B64_B32, } -def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) SCC = Reg(((1) if (S1.u32 > S0.u32) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 + S1.i32; - # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 + S1.i32) SCC = Reg(((S0.u32[31] == S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 - S1.i32; - # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 - S1.i32) SCC = Reg(((S0.u32[31] != S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADDC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + SCC.u64) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - SCC.u32; - # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUBB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - SCC.u32) SCC = Reg(((1) if ((S1.u32) + SCC.u64 > (S0.u32)) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = SCC ? S0.u64 : S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64) if (SCC) else (S1.u64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ANDN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ANDN2_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ANDN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ANDN2_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ORN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ORN2_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ORN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ORN2_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 << S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 << S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 >> S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 >> S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 >> S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 >> S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i32) >> S1[4 : 0].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (((1 << S0[5 : 0].u32) - 1) << S1[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 * S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 * S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); - # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32; - # if D0.i32 < 0 then - # D0.i32 = -D0.i32 - # endif; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ABSDIFF_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 if D0.i32 < 0: D0.i32 = -D0.i32 SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL1_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 1) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL2_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 2) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL3_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 3) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL4_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 4) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LL_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[15 : 0].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_HH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[31 : 16].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} SOP2Op_FUNCTIONS = { SOP2Op.S_ADD_U32: _SOP2Op_S_ADD_U32, @@ -1739,230 +678,77 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_PACK_HH_B32_B16: _SOP2Op_S_PACK_HH_B32_B16, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 == S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 != S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 > S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 <= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 == S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 != S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 > S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 <= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_SETVSKIP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VSKIP = S0.u32[S1.u32[4 : 0]] - S0 = Reg(s0) - S1 = Reg(s1) - # --- compiled pseudocode --- - VSKIP = S0.u32[S1.u32[4 : 0]] - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result - -def _SOPCOp_S_SET_GPR_IDX_ON(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # specified in the SRC0 operand. The raw bits of the SRC1 field are read and used to set the enable bits. S1[0] = - # VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and S1[3] = VDST_REL. - # M0[7 : 0] = S0.u32[7 : 0].b8; - # // this is the direct content of raw S1 field - S0 = Reg(s0) - S1 = Reg(s1) - SRC0 = Reg(src0_idx) - VDST = Reg(vdst_idx) - # --- compiled pseudocode --- - specified in the SRC0 operand. The raw bits of the SRC1 field are read and used to set the enable bits. S1[0] = - VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and S1[3] = VDST_REL. - M0[7 : 0] = S0.u32[7 : 0].b8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result - -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 == S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 != S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_EQ_I32: _SOPCOp_S_CMP_EQ_I32, @@ -1981,209 +767,83 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_BITCMP1_B32: _SOPCOp_S_BITCMP1_B32, SOPCOp.S_BITCMP0_B64: _SOPCOp_S_BITCMP0_B64, SOPCOp.S_BITCMP1_B64: _SOPCOp_S_BITCMP1_B64, - SOPCOp.S_SETVSKIP: _SOPCOp_S_SETVSKIP, - SOPCOp.S_SET_GPR_IDX_ON: _SOPCOp_S_SET_GPR_IDX_ON, SOPCOp.S_CMP_EQ_U64: _SOPCOp_S_CMP_EQ_U64, SOPCOp.S_CMP_LG_U64: _SOPCOp_S_CMP_LG_U64, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOPKOp_S_MOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.i32 = 32'I(signext(S0.i16)) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 == 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 == (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 != 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 != (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 > 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 > (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <= 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 <= (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 == 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 == (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 != 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 != (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 > 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 > (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <= 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 <= (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # D0.i32 = D0.i32 + 32'I(signext(S0.i16)); - # SCC = ((tmp[31] == S0.i16[15]) && (tmp[31] != D0.i32[31])); - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOPKOp_S_ADDK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) D0.i32 = D0.i32 + (signext(S0.i16)) SCC = Reg(((tmp[31] == S0.i16[15]) and (tmp[31] != D0.i32[31]))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = D0.i32 * 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOPKOp_S_MULK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = D0.i32 * (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL; - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - D0 = Reg(d0) +def _SOPKOp_S_CALL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- D0.i64 = PC + 4 PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, @@ -2205,257 +865,118 @@ SOPKOp_FUNCTIONS = { SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # for i in 0U : SIMM16.u16[3 : 0].u32 do - # endfor +def _SOPPOp_S_NOP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- for i in range(0, int(SIMM16.u16[3 : 0].u32)+1): pass - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; +def _SOPPOp_S_BRANCH(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 1 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 0 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // PC passed into trap handler points to S_TRAP itself, - # PC = TBA.i64; - # // trap base address - PC = Reg(pc) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result +def _SOPPOp_S_TRAP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if WAVE_STATUS.COND_DBG_SYS.u32 != 0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if WAVE_STATUS.COND_DBG_SYS.u32 != 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGUSER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if WAVE_STATUS.COND_DBG_USER.u32 != 0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGUSER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if WAVE_STATUS.COND_DBG_USER.u32 != 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_STATUS.COND_DBG_SYS || WAVE_STATUS.COND_DBG_USER) then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if (WAVE_STATUS.COND_DBG_SYS or WAVE_STATUS.COND_DBG_USER): PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_STATUS.COND_DBG_SYS && WAVE_STATUS.COND_DBG_USER) then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if (WAVE_STATUS.COND_DBG_SYS and WAVE_STATUS.COND_DBG_USER): PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_SET_GPR_IDX_MODE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Get Doorbell ID 10 - Returns doorbell into EXEC, with the doorbell physical address in bits - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result +def _SOPPOp_S_SET_GPR_IDX_MODE(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, @@ -2474,28 +995,11 @@ SOPPOp_FUNCTIONS = { SOPPOp.S_SET_GPR_IDX_MODE: _SOPPOp_S_SET_GPR_IDX_MODE, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'I; - # if EXEC == 0x0LL then - # lane = 0; - # // Force lane 0 if all lanes are disabled - # else - # lane = s_ff1_i32_b64(EXEC); - # // Lowest active lane - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP1Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if EXEC == 0x0: @@ -2503,877 +1007,343 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter else: lane = s_ff1_i32_b64(EXEC) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_RPI_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_FLR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FFBH_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FFBL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FFBH_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b64 = S0.b64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.f16 = S0.f16 - # else - # D0.f16 = mantissa(S0.f16) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.f16 = S0.f16 else: D0.f16 = mantissa(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.i16 = 16'0 - # else - # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.i16 = 0 else: D0.i16 = (exponent(S0.f16) - 15 + 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + -floor(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + -floor(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sin(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = cos(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_snorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_snorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_unorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_unorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 16'0; - # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); - # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); - # D0.b16 = tmp.b16 - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SAT_PK_U8_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) D0.b16 = tmp.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.b32; - # D0.b32 = S0.b32; - # S0.b32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SWAP_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.b32) D0.b32 = S0.b32 S0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SDWA_SRC0_SEL == BYTE1.b3 then - # D0.f32 = fp8_to_f32(S0[15 : 8].fp8) - # elsif SDWA_SRC0_SEL == BYTE2.b3 then - # D0.f32 = fp8_to_f32(S0[23 : 16].fp8) - # elsif SDWA_SRC0_SEL == BYTE3.b3 then - # D0.f32 = fp8_to_f32(S0[31 : 24].fp8) - # else - # // BYTE0 implied - # D0.f32 = fp8_to_f32(S0[7 : 0].fp8) - # endif - S0 = Reg(s0) - D0 = Reg(d0) +def _VOP1Op_V_CVT_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if SDWA_SRC0_SEL == BYTE1.b3: @@ -3384,23 +1354,9 @@ def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = fp8_to_f32(S0[31 : 24].fp8) else: D0.f32 = fp8_to_f32(S0[7 : 0].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SDWA_SRC0_SEL == BYTE1.b3 then - # D0.f32 = bf8_to_f32(S0[15 : 8].bf8) - # elsif SDWA_SRC0_SEL == BYTE2.b3 then - # D0.f32 = bf8_to_f32(S0[23 : 16].bf8) - # elsif SDWA_SRC0_SEL == BYTE3.b3 then - # D0.f32 = bf8_to_f32(S0[31 : 24].bf8) - # else - # // BYTE0 implied - # D0.f32 = bf8_to_f32(S0[7 : 0].bf8) - # endif - S0 = Reg(s0) - D0 = Reg(d0) +def _VOP1Op_V_CVT_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if SDWA_SRC0_SEL == BYTE1.b3: @@ -3411,93 +1367,44 @@ def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = bf8_to_f32(S0[31 : 24].bf8) else: D0.f32 = bf8_to_f32(S0[7 : 0].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = SDWA_SRC0_SEL[1 : 0] == WORD1.b2 ? S0[31 : 16] : S0[15 : 0]; - # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); - # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - S0 = Reg(s0) - D0 = Reg(d0) +def _VOP1Op_V_CVT_PK_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) - tmp = Reg(0) SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((S0[31 : 16]) if (SDWA_SRC0_SEL[1 : 0] == WORD1.b2) else (S0[15 : 0]))) D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8) D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = SDWA_SRC0_SEL[1 : 0] == WORD1.b2 ? S0[31 : 16] : S0[15 : 0]; - # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); - # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - S0 = Reg(s0) - D0 = Reg(d0) +def _VOP1Op_V_CVT_PK_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) - tmp = Reg(0) SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((S0[31 : 16]) if (SDWA_SRC0_SEL[1 : 0] == WORD1.b2) else (S0[15 : 0]))) D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8) D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_PERMLANE16_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # for pass in 0 : 1 do - # for lane in 0 : 15 do - # tmp = VGPR[pass * 32 + lane][SRC0.u32]; - # endfor - # endfor - tmp = Reg(0) +def _VOP1Op_V_PERMLANE16_SWAP_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- - for pass in range(0, int(1)+1): + for pass_ in range(0, int(1)+1): for lane in range(0, int(15)+1): - tmp = Reg(VGPR[pass * 32 + lane][SRC0.u32]) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + tmp = Reg(VGPR[pass_ * 32 + lane][SRC0.u32]) + return {} -def _VOP1Op_V_PERMLANE32_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # for lane in 0 : 31 do - # tmp = VGPR[lane][SRC0.u32]; - # endfor - tmp = Reg(0) +def _VOP1Op_V_PERMLANE32_SWAP_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- for lane in range(0, int(31)+1): tmp = Reg(VGPR[lane][SRC0.u32]) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP1Op_V_CVT_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 32'F({ S0.b16, 16'0U }) - # V_CMPX_{COMPF}_F16 16-bit float compare. Also writes EXEC. 0x30 to 0x3F - # V_CMPX_{COMPF}_F32 32-bit float compare. Also writes EXEC. 0x50 to 0x5F - # V_CMPSX_{COMPF}_F64 64-bit float compare. Also writes EXEC. 0x70 to 0x7F - # V_CMPX_{COMPI}_I16 16-bit unsigned integer compare. Also writes EXEC. 0xB0 - 0xB7 - # V_CMPX_{COMPI}_U16 16-bit unsigned integer compare. Also writes EXEC. 0xB8 - 0xBF - # V_CMPX_{COMPI}_I32 32-bit unsigned integer compare. Also writes EXEC. 0xD0 - 0xD7 - # V_CMPX_{COMPI}_U32 32-bit unsigned integer compare. Also writes EXEC. 0xD8 - 0xDF - # V_CMPX_{COMPI}_I64 64-bit unsigned integer compare. Also writes EXEC. 0xF0 - 0xF7 - # V_CMPX_{COMPI}_U64 64-bit unsigned integer compare. Also writes EXEC. 0xF8 - 0xFF - S0 = Reg(s0) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = F(_pack(S0.b16, 0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} VOP1Op_FUNCTIONS = { VOP1Op.V_MOV_B32: _VOP1Op_V_MOV_B32, @@ -3582,140 +1489,47 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_F32_BF16: _VOP1Op_V_CVT_F32_BF16, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = fma(S0.f64, S1.f64, D0.f64) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = fma(S0.f64, S1.f64, D0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then - # D0.f32 = S1.f32 - # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then - # D0.f32 = S0.f32 - # else - # D0.f32 = S0.f32 < S1.f32 ? S0.f32 : S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): @@ -3730,32 +1544,9 @@ def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = ((S0.f32) if (S0.f32 < S1.f32) else (S1.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then - # D0.f32 = S1.f32 - # elsif WAVE_MODE.IEEE then - # D0.f32 = S0.f32 >= S1.f32 ? S0.f32 : S1.f32 - # else - # D0.f32 = S0.f32 > S1.f32 ? S0.f32 : S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): @@ -3772,452 +1563,161 @@ def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = ((S0.f32) if (S0.f32 >= S1.f32) else (S1.f32)) else: D0.f32 = ((S0.f32) if (S0.f32 > S1.f32) else (S1.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAMK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAAK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32; - # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32) VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_ADDC_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUBB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUBBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.f16 * S1.f16 + D0.f16; - # if OPSEL.u4[3] then - # D0 = { tmp.f16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.f16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.f16 * S1.f16 + D0.f16) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.f16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MADMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.f16 * SIMM16.f16 + S1.f16; - S0 = Reg(s0) - S1 = Reg(s1) - tmp = Reg(0) +def _VOP2Op_V_MADMK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- tmp = Reg(S0.f16 * SIMM16.f16 + S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP2Op_V_MADAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.f16 * S1.f16 + SIMM16.f16; - S0 = Reg(s0) - S1 = Reg(s1) - tmp = Reg(0) +def _VOP2Op_V_MADAK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- tmp = Reg(S0.f16 * S1.f16 + SIMM16.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP2Op_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 + S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 + S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 - S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 - S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S1.u16 - S0.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S1.u16 - S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 << S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 << S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = (S1.i16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = (S1.i16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then - # D0.f16 = S1.f16 - # elsif WAVE_MODE.IEEE then - # D0.f16 = S0.f16 >= S1.f16 ? S0.f16 : S1.f16 - # else - # D0.f16 = S0.f16 > S1.f16 ? S0.f16 : S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): @@ -4234,30 +1734,9 @@ def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = ((S0.f16) if (S0.f16 >= S1.f16) else (S1.f16)) else: D0.f16 = ((S0.f16) if (S0.f16 > S1.f16) else (S1.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then - # D0.f16 = S1.f16 - # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then - # D0.f16 = S0.f16 - # else - # D0.f16 = S0.f16 < S1.f16 ? S0.f16 : S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): @@ -4272,172 +1751,64 @@ def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = ((S0.f16) if (S0.f16 < S1.f16) else (S1.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT2C_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); - # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT2C_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16) tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); - # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); - # tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8); - # tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT4C_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8) tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8) tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8) tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); - # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); - # tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4); - # tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4); - # tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4); - # tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4); - # tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4); - # tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT8C_I32_I4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4) tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4) @@ -4448,62 +1819,27 @@ def _VOP2Op_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4) tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16); - # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_PK_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.f32; - # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); - # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT2C_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.f32) tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16) tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP2Op_FUNCTIONS = { VOP2Op.V_CNDMASK_B32: _VOP2Op_V_CNDMASK_B32, @@ -4570,448 +1906,198 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_DOT2C_F32_BF16: _VOP2Op_V_DOT2C_F32_BF16, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32) tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 >= S1[15 : 0].i16) else (S1[15 : 0].i16)) tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 >= S1[31 : 16].i16) else (S1[31 : 16].i16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 < S1[15 : 0].i16) else (S1[15 : 0].i16)) tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 < S1[31 : 16].i16) else (S1[31 : 16].i16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 >= S1[15 : 0].u16) else (S1[15 : 0].u16)) tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 >= S1[31 : 16].u16) else (S1[31 : 16].u16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 < S1[15 : 0].u16) else (S1[15 : 0].u16)) tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 < S1[31 : 16].u16) else (S1[31 : 16].u16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); - # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16) tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; - # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16 tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; - # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16 tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.i32; - # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); - # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.i32) tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16) tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u16_to_u32(S0[15 : 0].u16) * u16_to_u32(S1[15 : 0].u16); - # tmp += u16_to_u32(S0[31 : 16].u16) * u16_to_u32(S1[31 : 16].u16); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u16_to_u32(S0[15 : 0].u16) * u16_to_u32(S1[15 : 0].u16) tmp += u16_to_u32(S0[31 : 16].u16) * u16_to_u32(S1[31 : 16].u16) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.i32; - # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); - # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); - # tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8); - # tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.i32) tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8) tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8) tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8) tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); - # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); - # tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8); - # tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8) tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8) tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8) tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT8_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.i32; - # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); - # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); - # tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4); - # tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4); - # tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4); - # tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4); - # tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4); - # tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT8_I32_I4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.i32) tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4) tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4) @@ -5022,27 +2108,9 @@ def _VOP3POp_V_DOT8_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4) tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); - # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); - # tmp += u4_to_u32(S0[11 : 8].u4) * u4_to_u32(S1[11 : 8].u4); - # tmp += u4_to_u32(S0[15 : 12].u4) * u4_to_u32(S1[15 : 12].u4); - # tmp += u4_to_u32(S0[19 : 16].u4) * u4_to_u32(S1[19 : 16].u4); - # tmp += u4_to_u32(S0[23 : 20].u4) * u4_to_u32(S1[23 : 20].u4); - # tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4); - # tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT8_U32_U4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4) tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4) @@ -5053,135 +2121,63 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4) tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 64'B; - # tmp[31 : 0].f32 = fma(S0[31 : 0].f32, S1[31 : 0].f32, S2[31 : 0].f32); - # tmp[63 : 32].f32 = fma(S0[63 : 32].f32, S1[63 : 32].f32, S2[63 : 32].f32); - # D0.b64 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_FMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 0].f32 = fma(S0[31 : 0].f32, S1[31 : 0].f32, S2[31 : 0].f32) tmp[63 : 32].f32 = fma(S0[63 : 32].f32, S1[63 : 32].f32, S2[63 : 32].f32) D0.b64 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 64'B; - # tmp[31 : 0].f32 = S0[31 : 0].f32 * S1[31 : 0].f32; - # tmp[63 : 32].f32 = S0[63 : 32].f32 * S1[63 : 32].f32; - # D0.b64 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 0].f32 = S0[31 : 0].f32 * S1[31 : 0].f32 tmp[63 : 32].f32 = S0[63 : 32].f32 * S1[63 : 32].f32 D0.b64 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 64'B; - # tmp[31 : 0].f32 = S0[31 : 0].f32 + S1[31 : 0].f32; - # tmp[63 : 32].f32 = S0[63 : 32].f32 + S1[63 : 32].f32; - # D0.b64 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 0].f32 = S0[31 : 0].f32 + S1[31 : 0].f32 tmp[63 : 32].f32 = S0[63 : 32].f32 + S1[63 : 32].f32 D0.b64 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3POp_V_PK_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp0.u32 = S0.u32[OPSEL[0].i32 * 32 + 31 : OPSEL[0].i32 * 32]; - # tmp1.u32 = S1.u32[OPSEL[1].i32 * 32 + 31 : OPSEL[1].i32 * 32]; - # D0.u32[31 : 0] = tmp0.u32; - # D0.u32[63 : 32] = tmp1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp0.u32 = S0.u32[OPSEL[0].i32 * 32 + 31 : OPSEL[0].i32 * 32] tmp1.u32 = S1.u32[OPSEL[1].i32 * 32 + 31 : OPSEL[1].i32 * 32] D0.u32[31 : 0] = tmp0.u32 D0.u32[63 : 32] = tmp1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 32'F(S0[15 : 0].bf16) * 32'F(S1[15 : 0].bf16); - # tmp += 32'F(S0[31 : 16].bf16) * 32'F(S1[31 : 16].bf16); - # tmp += S2.f32; - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(F(S0[15 : 0].bf16) * F(S1[15 : 0].bf16)) tmp += F(S0[31 : 16].bf16) * F(S1[31 : 16].bf16) tmp += S2.f32 D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = 16'F(v_minimum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)); - # tmp[15 : 0].f16 = 16'F(v_minimum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MINIMUM3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = F(v_minimum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)) tmp[15 : 0].f16 = F(v_minimum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = 16'F(v_maximum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)); - # tmp[15 : 0].f16 = 16'F(v_maximum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAXIMUM3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = F(v_maximum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)) tmp[15 : 0].f16 = F(v_maximum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3POp_FUNCTIONS = { VOP3POp.V_PK_MAD_I16: _VOP3POp_V_PK_MAD_I16, @@ -5219,45 +2215,7 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_PK_MAXIMUM3_F16: _VOP3POp_V_PK_MAXIMUM3_F16, } -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -5271,52 +2229,9 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -5330,52 +2245,9 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -5389,52 +2261,9 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -5448,52 +2277,9 @@ def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -5507,52 +2293,9 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -5566,4131 +2309,773 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - tmp = Reg(0) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) tmp = Reg(MEM[addr].u32) @@ -9698,15 +3083,7 @@ def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP tmp = Reg(MEM[addr].u32) addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) tmp = Reg(MEM[addr].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} VOPCOp_FUNCTIONS = { VOPCOp.V_CMP_CLASS_F32: _VOPCOp_V_CMP_CLASS_F32, @@ -9909,45 +3286,7 @@ VOPCOp_FUNCTIONS = { VOPCOp.V_CMPX_T_U64: _VOPCOp_V_CMPX_T_U64, } -def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -9961,51 +3300,9 @@ def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -10019,51 +3316,9 @@ def _VOP3AOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -10077,51 +3332,9 @@ def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -10135,51 +3348,9 @@ def _VOP3AOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -10193,51 +3364,9 @@ def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -10251,3939 +3380,773 @@ def _VOP3AOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - tmp = Reg(0) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) tmp = Reg(MEM[addr].u32) @@ -14191,37 +4154,13 @@ def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG tmp = Reg(MEM[addr].u32) addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) tmp = Reg(MEM[addr].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'I; - # if EXEC == 0x0LL then - # lane = 0; - # // Force lane 0 if all lanes are disabled - # else - # lane = s_ff1_i32_b64(EXEC); - # // Lowest active lane - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP3AOp_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if EXEC == 0x0: @@ -14229,827 +4168,314 @@ def _VOP3AOp_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite else: lane = s_ff1_i32_b64(EXEC) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_RPI_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_FLR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FFBH_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FFBL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FFBH_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b64 = S0.b64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = fma(S0.f64, S1.f64, D0.f64) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMAC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = fma(S0.f64, S1.f64, D0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then - # D0.f32 = S1.f32 - # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then - # D0.f32 = S0.f32 - # else - # D0.f32 = S0.f32 < S1.f32 ? S0.f32 : S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): @@ -15064,32 +4490,9 @@ def _VOP3AOp_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = ((S0.f32) if (S0.f32 < S1.f32) else (S1.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then - # D0.f32 = S1.f32 - # elsif WAVE_MODE.IEEE then - # D0.f32 = S0.f32 >= S1.f32 ? S0.f32 : S1.f32 - # else - # D0.f32 = S0.f32 > S1.f32 ? S0.f32 : S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): @@ -15106,284 +4509,101 @@ def _VOP3AOp_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = ((S0.f32) if (S0.f32 >= S1.f32) else (S1.f32)) else: D0.f32 = ((S0.f32) if (S0.f32 > S1.f32) else (S1.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.f16 * S1.f16 + D0.f16; - # if OPSEL.u4[3] then - # D0 = { tmp.f16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.f16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.f16 * S1.f16 + D0.f16) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.f16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 + S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 + S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 - S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 - S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S1.u16 - S0.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUBREV_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S1.u16 - S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 << S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 << S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = (S1.i16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = (S1.i16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then - # D0.f16 = S1.f16 - # elsif WAVE_MODE.IEEE then - # D0.f16 = S0.f16 >= S1.f16 ? S0.f16 : S1.f16 - # else - # D0.f16 = S0.f16 > S1.f16 ? S0.f16 : S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): @@ -15400,30 +4620,9 @@ def _VOP3AOp_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = ((S0.f16) if (S0.f16 >= S1.f16) else (S1.f16)) else: D0.f16 = ((S0.f16) if (S0.f16 > S1.f16) else (S1.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then - # D0.f16 = S1.f16 - # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then - # D0.f16 = S0.f16 - # else - # D0.f16 = S0.f16 < S1.f16 ? S0.f16 : S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): @@ -15438,172 +4637,64 @@ def _VOP3AOp_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = ((S0.f16) if (S0.f16 < S1.f16) else (S1.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUBREV_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DOT2C_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); - # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DOT2C_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16) tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); - # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); - # tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8); - # tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DOT4C_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8) tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8) tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8) tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); - # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); - # tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4); - # tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4); - # tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4); - # tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4); - # tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4); - # tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DOT8C_I32_I4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4) tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4) @@ -15614,99 +4705,30 @@ def _VOP3AOp_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4) tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16); - # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_PK_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = 5.0F - # else - # D0.f32 = 4.0F - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = 3.0F - # else - # D0.f32 = 2.0F - # endif - # else - # if S0.f32 < 0.0F then - # D0.f32 = 1.0F - # else - # D0.f32 = 0.0F - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CUBEID_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = 5.0 @@ -15722,36 +4744,9 @@ def _VOP3AOp_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = 1.0 else: D0.f32 = 0.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap S coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = -S0.f32 - # else - # D0.f32 = S0.f32 - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S0.f32 - # else - # if S0.f32 < 0.0F then - # D0.f32 = S2.f32 - # else - # D0.f32 = -S2.f32 - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CUBESC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = -S0.f32 @@ -15764,32 +4759,9 @@ def _VOP3AOp_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S2.f32 else: D0.f32 = -S2.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap T coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = -S1.f32 - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = -S2.f32 - # else - # D0.f32 = S2.f32 - # endif - # else - # D0.f32 = -S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CUBETC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = -S1.f32 elif abs(S1.f32) >= abs(S0.f32): @@ -15799,234 +4771,81 @@ def _VOP3AOp_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S2.f32 else: D0.f32 = -S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = 2.0 * cubemap major axis. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = S2.f32 * 2.0F - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S1.f32 * 2.0F - # else - # D0.f32 = S0.f32 * 2.0F - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CUBEMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = S2.f32 * 2.0 elif abs(S1.f32) >= abs(S0.f32): D0.f32 = S1.f32 * 2.0 else: D0.f32 = S0.f32 * 2.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_BFI_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMA_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); - # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); - # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); - # tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1U); - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LERP_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1 << 24)) tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1 << 16) tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1 << 8) tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0]) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ALIGNBIT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> S2.u32[4 : 0]) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0] * 8U)) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ALIGNBYTE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> (S2.u32[1 : 0] * 8)) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then - # D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) - # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then - # D0.f32 = v_max_f32(S1.f32, S2.f32) - # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S1.f32 then - # D0.f32 = v_max_f32(S0.f32, S2.f32) - # else - # D0.f32 = v_max_f32(S0.f32, S1.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f32)) or isNAN(F(S1.f32)) or isNAN(F(S2.f32))): D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) elif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32: @@ -16035,172 +4854,57 @@ def _VOP3AOp_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0.f32 = v_max_f32(S0.f32, S2.f32) else: D0.f32 = v_max_f32(S0.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then - # D0.i32 = v_max_i32(S1.i32, S2.i32) - # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then - # D0.i32 = v_max_i32(S0.i32, S2.i32) - # else - # D0.i32 = v_max_i32(S0.i32, S1.i32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32: D0.i32 = v_max_i32(S1.i32, S2.i32) elif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32: D0.i32 = v_max_i32(S0.i32, S2.i32) else: D0.i32 = v_max_i32(S0.i32, S1.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then - # D0.u32 = v_max_u32(S1.u32, S2.u32) - # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then - # D0.u32 = v_max_u32(S0.u32, S2.u32) - # else - # D0.u32 = v_max_u32(S0.u32, S1.u32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32: D0.u32 = v_max_u32(S1.u32, S2.u32) elif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32: D0.u32 = v_max_u32(S0.u32, S2.u32) else: D0.u32 = v_max_u32(S0.u32, S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += (ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])) tmp += (ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])) tmp += (ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])) tmp += (ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SAD_HI_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((v_sad_u8(S0, S1, 0)) << 16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); - # tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16) tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); - # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_PK_U8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S2.u32 & (~(0xff << (S1.u32[1 : 0].u32 * 8))))) tmp = Reg((tmp | (((f32_to_u8(S0.f32)) & 255) << (S1.u32[1 : 0].u32 * 8)))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f32) ^ sign(S2.f32)); - # if isNAN(64'F(S2.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif ((64'F(S1.f32) == 0.0) && (64'F(S2.f32) == 0.0)) then - # // 0/0 - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(abs(S1.f32)) == +INF) && (64'F(abs(S2.f32)) == +INF)) then - # // inf/inf - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(S1.f32) == 0.0) || (64'F(abs(S2.f32)) == +INF)) then - # // x/0, or inf/y - # D0.f32 = sign_out ? -INF.f32 : +INF.f32 - # elsif ((64'F(abs(S1.f32)) == +INF) || (64'F(S2.f32) == 0.0)) then - # // x/inf, 0/y - # D0.f32 = sign_out ? -0.0F : 0.0F - # elsif exponent(S2.f32) - exponent(S1.f32) < -150 then - # D0.f32 = sign_out ? -UNDERFLOW_F32 : UNDERFLOW_F32 - # elsif exponent(S1.f32) == 255 then - # D0.f32 = sign_out ? -OVERFLOW_F32 : OVERFLOW_F32 - # else - # D0.f32 = sign_out ? -abs(S0.f32) : abs(S0.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FIXUP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f32) ^ sign(S2.f32)) if isNAN(F(S2.f32)): D0.f32 = F(cvtToQuietNAN(F(S2.f32))) @@ -16220,40 +4924,9 @@ def _VOP3AOp_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) else: D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) if isNAN(S0.f32) else ((-abs(S0.f32)) if (sign_out) else (abs(S0.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f64) ^ sign(S2.f64)); - # if isNAN(S2.f64) then - # D0.f64 = cvtToQuietNAN(S2.f64) - # elsif isNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif ((S1.f64 == 0.0) && (S2.f64 == 0.0)) then - # // 0/0 - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((abs(S1.f64) == +INF) && (abs(S2.f64) == +INF)) then - # // inf/inf - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((S1.f64 == 0.0) || (abs(S2.f64) == +INF)) then - # // x/0, or inf/y - # D0.f64 = sign_out ? -INF : +INF - # elsif ((abs(S1.f64) == +INF) || (S2.f64 == 0.0)) then - # // x/inf, 0/y - # D0.f64 = sign_out ? -0.0 : 0.0 - # elsif exponent(S2.f64) - exponent(S1.f64) < -1075 then - # D0.f64 = sign_out ? -UNDERFLOW_F64 : UNDERFLOW_F64 - # elsif exponent(S1.f64) == 2047 then - # D0.f64 = sign_out ? -OVERFLOW_F64 : OVERFLOW_F64 - # else - # D0.f64 = sign_out ? -abs(S0.f64) : abs(S0.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FIXUP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f64) ^ sign(S2.f64)) if isNAN(S2.f64): D0.f64 = cvtToQuietNAN(S2.f64) @@ -16273,90 +4946,32 @@ def _VOP3AOp_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) else: D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) - # else - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FMAS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f32 = (2.0 ** 64 if exponent(S2.f32) > 127 else 2.0 ** -64) * fma(S0.f32, S1.f32, S2.f32) else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) - # else - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FMAS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f64 = (2.0 ** 128 if exponent(S2.f64) > 1023 else 2.0 ** -128) * fma(S0.f64, S1.f64, S2.f64) else: D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += S1.u32[15 : 8] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += S1.u32[23 : 16] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += S1.u32[31 : 24] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MSAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ((0) if (S1.u32[7 : 0] == 0) else ((ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])))) tmp += ((0) if (S1.u32[15 : 8] == 0) else ((ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])))) tmp += ((0) if (S1.u32[23 : 16] == 0) else ((ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])))) tmp += ((0) if (S1.u32[31 : 24] == 0) else ((ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_QSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -16364,21 +4979,9 @@ def _VOP3AOp_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal tmp[31 : 16] = (v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_MQSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -16386,21 +4989,9 @@ def _VOP3AOp_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera tmp[31 : 16] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); - # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); - # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); - # tmp[31 : 0] = 32'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)); - # D0.b128 = tmp.b128 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_MQSAD_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[127 : 96] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)) @@ -16408,148 +4999,48 @@ def _VOP3AOp_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, tmp[63 : 32] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)) tmp[31 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)) D0.b128 = tmp.b128 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.f16 * S1.f16 + S2.f16; - # if OPSEL.u4[3] then - # D0 = { tmp.f16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.f16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_LEGACY_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.f16 * S1.f16 + S2.f16) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.f16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_LEGACY_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u16 * S1.u16 + S2.u16; - # if OPSEL.u4[3] then - # D0 = { tmp.u16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.u16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_LEGACY_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u16 * S1.u16 + S2.u16) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.u16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_LEGACY_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i16 * S1.i16 + S2.i16; - # if OPSEL.u4[3] then - # D0 = { tmp.i16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.i16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_LEGACY_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i16 * S1.i16 + S2.i16) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.i16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); - # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); - # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); - # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_PERM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMA_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = fma(S0.f16, S1.f16, S2.f16); - # if OPSEL.u4[3] then - # D0 = { tmp.f16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.f16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMA_LEGACY_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(fma(S0.f16, S1.f16, S2.f16)) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.f16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f16) ^ sign(S2.f16)); - # if isNAN(64'F(S2.f16)) then - # tmp = cvtToQuietNAN(64'F(S2.f16)) - # elsif isNAN(64'F(S1.f16)) then - # tmp = cvtToQuietNAN(64'F(S1.f16)) - # elsif ((64'F(S1.f16) == 0.0) && (64'F(S2.f16) == 0.0)) then - # // 0/0 - # tmp = 16'F(0xfe00) - # elsif ((64'F(abs(S1.f16)) == +INF) && (64'F(abs(S2.f16)) == +INF)) then - # // inf/inf - # tmp = 16'F(0xfe00) - # elsif ((64'F(S1.f16) == 0.0) || (64'F(abs(S2.f16)) == +INF)) then - # // x/0, or inf/y - # tmp = sign_out ? -INF : +INF - # elsif ((64'F(abs(S1.f16)) == +INF) || (64'F(S2.f16) == 0.0)) then - # // x/inf, 0/y - # tmp = sign_out ? -0.0 : 0.0 - # else - # tmp = sign_out ? -abs(S0.f16) : abs(S0.f16) - # endif; - # if OPSEL.u4[3] then - # D0 = { tmp.f16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.f16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f16) ^ sign(S2.f16)) if isNAN(F(S2.f16)): tmp = Reg(cvtToQuietNAN(F(S2.f16))) @@ -16569,148 +5060,51 @@ def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l D0 = Reg(_pack(tmp.f16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_PKACCUM_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # byte = S1.u32[1 : 0]; - # bit = byte.u32 * 8U; - # D0.u32[bit + 7U : bit] = 32'U(f32_to_u8(S0.f32)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_PKACCUM_U8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): byte = S1.u32[1 : 0] bit = byte.u32 * 8 - D0.u32[bit + 7U : bit] = (f32_to_u8(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + D0.u32[bit + 7 : bit] = (f32_to_u8(S0.f32)) + return {'D0': D0} -def _VOP3AOp_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u16) * (S1.u16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i16) * (S1.i16) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_XAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then - # D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) - # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then - # D0.f16 = v_max_f16(S1.f16, S2.f16) - # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S1.f16 then - # D0.f16 = v_max_f16(S0.f16, S2.f16) - # else - # D0.f16 = v_max_f16(S0.f16, S1.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f16)) or isNAN(F(S1.f16)) or isNAN(F(S2.f16))): D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) elif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16: @@ -16719,202 +5113,67 @@ def _VOP3AOp_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0.f16 = v_max_f16(S0.f16, S2.f16) else: D0.f16 = v_max_f16(S0.f16, S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then - # D0.i16 = v_max_i16(S1.i16, S2.i16) - # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then - # D0.i16 = v_max_i16(S0.i16, S2.i16) - # else - # D0.i16 = v_max_i16(S0.i16, S1.i16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16: D0.i16 = v_max_i16(S1.i16, S2.i16) elif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16: D0.i16 = v_max_i16(S0.i16, S2.i16) else: D0.i16 = v_max_i16(S0.i16, S1.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then - # D0.u16 = v_max_u16(S1.u16, S2.u16) - # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then - # D0.u16 = v_max_u16(S0.u16, S2.u16) - # else - # D0.u16 = v_max_u16(S0.u16, S1.u16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16: D0.u16 = v_max_u16(S1.u16, S2.u16) elif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16: D0.u16 = v_max_u16(S0.u16, S2.u16) else: D0.u16 = v_max_u16(S0.u16, S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHL_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_LSHL_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHL_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_AND_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32 | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_OR3_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32 | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 + S2.f16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 + S2.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 + S2.u16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 + S2.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 * S1.i16 + S2.i16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 * S1.i16 + S2.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f16) ^ sign(S2.f16)); - # if isNAN(64'F(S2.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif ((64'F(S1.f16) == 0.0) && (64'F(S2.f16) == 0.0)) then - # // 0/0 - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(abs(S1.f16)) == +INF) && (64'F(abs(S2.f16)) == +INF)) then - # // inf/inf - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(S1.f16) == 0.0) || (64'F(abs(S2.f16)) == +INF)) then - # // x/0, or inf/y - # D0.f16 = sign_out ? -INF.f16 : +INF.f16 - # elsif ((64'F(abs(S1.f16)) == +INF) || (64'F(S2.f16) == 0.0)) then - # // x/inf, 0/y - # D0.f16 = sign_out ? -16'0.0 : 16'0.0 - # else - # D0.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FIXUP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f16) ^ sign(S2.f16)) if isNAN(F(S2.f16)): D0.f16 = F(cvtToQuietNAN(F(S2.f16))) @@ -16930,67 +5189,21 @@ def _VOP3AOp_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f16 = ((-0.0) if (sign_out) else (0.0)) else: D0.f16 = ((-abs(S0.f16)) if (sign_out) else (abs(S0.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHL_ADD_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 << S1.u32[2 : 0].u32) + S2.u64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHL_ADD_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 << S1.u32[2 : 0].u32) + S2.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(S0.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif (WAVE_MODE.IEEE && isSignalNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 == +0.0) && (S1.f64 == -0.0)) then - # D0.f64 = S1.f64 - # elsif ((S0.f64 == -0.0) && (S1.f64 == +0.0)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = S0.f64 < S1.f64 ? S0.f64 : S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(S0.f64)): D0.f64 = cvtToQuietNAN(S0.f64) elif (WAVE_MODE.IEEE and isSignalNAN(S1.f64)): @@ -17005,33 +5218,9 @@ def _VOP3AOp_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f64 = S0.f64 else: D0.f64 = ((S0.f64) if (S0.f64 < S1.f64) else (S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(S0.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif (WAVE_MODE.IEEE && isSignalNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 == +0.0) && (S1.f64 == -0.0)) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 == -0.0) && (S1.f64 == +0.0)) then - # D0.f64 = S1.f64 - # elsif WAVE_MODE.IEEE then - # D0.f64 = S0.f64 >= S1.f64 ? S0.f64 : S1.f64 - # else - # D0.f64 = S0.f64 > S1.f64 ? S0.f64 : S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(S0.f64)): D0.f64 = cvtToQuietNAN(S0.f64) elif (WAVE_MODE.IEEE and isSignalNAN(S1.f64)): @@ -17048,155 +5237,55 @@ def _VOP3AOp_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f64 = ((S0.f64) if (S0.f64 >= S1.f64) else (S1.f64)) else: D0.f64 = ((S0.f64) if (S0.f64 > S1.f64) else (S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * 2.0 ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LDEXP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 * S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_LO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 * S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * 2.0F ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LDEXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # lane = S1.u32[5 : 0]; - # // Lane select - # D0.b32 = VGPR[lane][SRC0.u32] - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_READLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- lane = S1.u32[5 : 0] D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32; - # for i in 0 : 31 do - # tmp += S0[i].u32; - # // count i'th bit - # endfor; - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_BCNT_U32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32) for i in range(0, int(31)+1): tmp += S0[i].u32 D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 << S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHLREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 << S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHRREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (S1.i64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ASHRREV_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (S1.i64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # shift = 32'I(S1[4 : 0].u32) * 53; - # if exponent(S0.f64) > 1077 then - # shift += exponent(S0.f64) - 1077 - # endif; - # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} - # // b_1200 is the MSB of the fractional part of 2.0/PI - # // Left shift operation indicates which bits are brought - # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); - # scale = -53 - shift; - # if exponent(S0.f64) >= 1968 then - # scale += 128 - # endif; - # D0.f64 = ldexp(result, scale) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_TRIG_PREOP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): shift = (S1[4 : 0].u32) * 53 if exponent(S0.f64) > 1077: shift += exponent(S0.f64) - 1077 @@ -17205,353 +5294,129 @@ def _VOP3AOp_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if exponent(S0.f64) >= 1968: scale += 128 D0.f64 = ldexp(result, scale) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_PKNORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); - # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PKNORM_I16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f32_to_snorm(S0.f32) tmp[31 : 16].i16 = f32_to_snorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PKNORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); - # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PKNORM_U16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f32_to_unorm(S0.f32) tmp[31 : 16].u16 = f32_to_unorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PKRTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PKRTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = u32_to_u16(S0.u32); - # tmp[31 : 16].u16 = u32_to_u16(S1.u32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PK_U16_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = u32_to_u16(S0.u32) tmp[31 : 16].u16 = u32_to_u16(S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = i32_to_i16(S0.i32); - # tmp[31 : 16].i16 = i32_to_i16(S1.i32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PK_I16_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = i32_to_i16(S0.i32) tmp[31 : 16].i16 = i32_to_i16(S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PKNORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); - # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PKNORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f16_to_snorm(S0.f16) tmp[31 : 16].i16 = f16_to_snorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PKNORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); - # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PKNORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f16_to_unorm(S0.f16) tmp[31 : 16].u16 = f16_to_unorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 + S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 + S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 + S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 + S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 - S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 - S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 16].f16 = S1.f16; - # D0[15 : 0].f16 = S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_PACK_B32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 16].f16 = S1.f16 D0[15 : 0].f16 = S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_LEGACY_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = 0.0F - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_LEGACY_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = 0.0 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.f32; - # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); - # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DOT2C_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.f32) tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16) tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BITOP3_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 16'0U; - # tmp = (tmp | (32'I(TTBL.b32 & 0x1) != 0 ? 16'U(~S0.b16 & ~S1.b16 & ~S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x2) != 0 ? 16'U(~S0.b16 & ~S1.b16 & S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x4) != 0 ? 16'U(~S0.b16 & S1.b16 & ~S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x8) != 0 ? 16'U(~S0.b16 & S1.b16 & S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x10) != 0 ? 16'U(S0.b16 & ~S1.b16 & ~S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x20) != 0 ? 16'U(S0.b16 & ~S1.b16 & S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x40) != 0 ? 16'U(S0.b16 & S1.b16 & ~S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x80) != 0 ? 16'U(S0.b16 & S1.b16 & S2.b16) : 16'0U)); - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp = Reg(0) - tmp = Reg((tmp | ((TTBL.b32 & 0x1) != 0 ? (~S0.b16 & ~S1.b16 & ~S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x2) != 0 ? (~S0.b16 & ~S1.b16 & S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x4) != 0 ? (~S0.b16 & S1.b16 & ~S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x8) != 0 ? (~S0.b16 & S1.b16 & S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x10) != 0 ? (S0.b16 & ~S1.b16 & ~S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x20) != 0 ? (S0.b16 & ~S1.b16 & S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x40) != 0 ? (S0.b16 & S1.b16 & ~S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x80) != 0 ? (S0.b16 & S1.b16 & S2.b16) : 0))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result - -def _VOP3AOp_V_BITOP3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # tmp = (tmp | (32'I(TTBL.b32 & 0x1) != 0 ? 32'U(~S0.b32 & ~S1.b32 & ~S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x2) != 0 ? 32'U(~S0.b32 & ~S1.b32 & S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x4) != 0 ? 32'U(~S0.b32 & S1.b32 & ~S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x8) != 0 ? 32'U(~S0.b32 & S1.b32 & S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x10) != 0 ? 32'U(S0.b32 & ~S1.b32 & ~S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x20) != 0 ? 32'U(S0.b32 & ~S1.b32 & S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x40) != 0 ? 32'U(S0.b32 & S1.b32 & ~S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x80) != 0 ? 32'U(S0.b32 & S1.b32 & S2.b32) : 0U)); - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp = Reg(0) - tmp = Reg((tmp | ((TTBL.b32 & 0x1) != 0 ? (~S0.b32 & ~S1.b32 & ~S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x2) != 0 ? (~S0.b32 & ~S1.b32 & S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x4) != 0 ? (~S0.b32 & S1.b32 & ~S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x8) != 0 ? (~S0.b32 & S1.b32 & S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x10) != 0 ? (S0.b32 & ~S1.b32 & ~S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x20) != 0 ? (S0.b32 & ~S1.b32 & S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x40) != 0 ? (S0.b32 & S1.b32 & ~S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x80) != 0 ? (S0.b32 & S1.b32 & S2.b32) : 0))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp0 = f32_to_fp8_scale(S0.f32, scale.u8); - # tmp1 = f32_to_fp8_scale(S1.f32, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) tmp0 = f32_to_fp8_scale(S0.f32, scale.u8) tmp1 = f32_to_fp8_scale(S1.f32, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp0 = f32_to_bf8_scale(S0.f32, scale.u8); - # tmp1 = f32_to_bf8_scale(S1.f32, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) tmp0 = f32_to_bf8_scale(S0.f32, scale.u8) tmp1 = f32_to_bf8_scale(S1.f32, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = f32_to_fp8_sr_scale(S0.f32, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(f32_to_fp8_sr_scale(S0.f32, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = f32_to_bf8_sr_scale(S0.f32, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(f32_to_bf8_sr_scale(S0.f32, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[31 : 0].f32 = tmp0; - # D0[63 : 32].f32 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17559,20 +5424,10 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[31 : 0].f32 = tmp0 D0[63 : 32].f32 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[31 : 0].f32 = tmp0; - # D0[63 : 32].f32 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17580,75 +5435,36 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[31 : 0].f32 = tmp0 D0[63 : 32].f32 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8; - # tmp = fp8_to_f32_scale(src, scale.u8); - S1 = Reg(s1) - tmp = Reg(0) - laneId = lane +def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) srcbyte = OPSEL[1 : 0].i32 * 8 src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8 tmp = Reg(fp8_to_f32_scale(src, scale.u8)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8; - # tmp = bf8_to_f32_scale(src, scale.u8); - S1 = Reg(s1) - tmp = Reg(0) - laneId = lane +def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) srcbyte = OPSEL[1 : 0].i32 * 8 src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8 tmp = Reg(bf8_to_f32_scale(src, scale.u8)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp0 = f32_to_fp4_scale(S0.f32, scale.u8); - # tmp1 = f32_to_fp4_scale(S1.f32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) tmp0 = f32_to_fp4_scale(S0.f32, scale.u8) tmp1 = f32_to_fp4_scale(S1.f32, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # tmp0 = f32_to_fp4_sr_scale(S0[31 : 0].f32, randomVal, scale.u8); - # tmp1 = f32_to_fp4_sr_scale(S0[63 : 32].f32, randomVal, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) @@ -17656,20 +5472,10 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_m tmp0 = f32_to_fp4_sr_scale(S0[31 : 0].f32, randomVal, scale.u8) tmp1 = f32_to_fp4_sr_scale(S0[63 : 32].f32, randomVal, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; - # D0[31 : 0].f32 = tmp0; - # D0[63 : 32].f32 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17677,160 +5483,70 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8 D0[31 : 0].f32 = tmp0 D0[63 : 32].f32 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = f16_to_fp8_scale(S0[15 : 0].f16, scale.u8); - # tmp1 = f16_to_fp8_scale(S0[31 : 16].f16, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = f16_to_fp8_scale(S0[15 : 0].f16, scale.u8) tmp1 = f16_to_fp8_scale(S0[31 : 16].f16, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = f16_to_bf8_scale(S0[15 : 0].f16, scale.u8); - # tmp1 = f16_to_bf8_scale(S0[31 : 16].f16, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = f16_to_bf8_scale(S0[15 : 0].f16, scale.u8) tmp1 = f16_to_bf8_scale(S0[31 : 16].f16, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = f16_to_fp8_sr_scale(S0.f16, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(f16_to_fp8_sr_scale(S0.f16, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = f16_to_bf8_sr_scale(S0.f16, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(f16_to_bf8_sr_scale(S0.f16, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = bf16_to_fp8_scale(S0[15 : 0].bf16, scale.u8); - # tmp1 = bf16_to_fp8_scale(S0[31 : 16].bf16, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = bf16_to_fp8_scale(S0[15 : 0].bf16, scale.u8) tmp1 = bf16_to_fp8_scale(S0[31 : 16].bf16, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = bf16_to_bf8_scale(S0[15 : 0].bf16, scale.u8); - # tmp1 = bf16_to_bf8_scale(S0[31 : 16].bf16, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = bf16_to_bf8_scale(S0[15 : 0].bf16, scale.u8) tmp1 = bf16_to_bf8_scale(S0[31 : 16].bf16, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = bf16_to_fp8_sr_scale(S0.bf16, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(bf16_to_fp8_sr_scale(S0.bf16, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = bf16_to_bf8_sr_scale(S0.bf16, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(bf16_to_bf8_sr_scale(S0.bf16, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[15 : 0].f16 = tmp0; - # D0[31 : 16].f16 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17838,20 +5554,10 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[15 : 0].f16 = tmp0 D0[31 : 16].f16 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[15 : 0].f16 = tmp0; - # D0[31 : 16].f16 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17859,94 +5565,45 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[15 : 0].f16 = tmp0 D0[31 : 16].f16 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8; - # tmp = fp8_to_f16_scale(src, scale.u8); - # // OPSEL[3] controls destination hi/lo - S1 = Reg(s1) - tmp = Reg(0) - laneId = lane +def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) srcbyte = OPSEL[1 : 0].i32 * 8 src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8 tmp = Reg(fp8_to_f16_scale(src, scale.u8)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8; - # tmp = bf8_to_f16_scale(src, scale.u8); - # // OPSEL[3] controls destination hi/lo - S1 = Reg(s1) - tmp = Reg(0) - laneId = lane +def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) srcbyte = OPSEL[1 : 0].i32 * 8 src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8 tmp = Reg(bf8_to_f16_scale(src, scale.u8)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = f16_to_fp4_scale(S0[15 : 0].f16, scale.u8); - # tmp1 = f16_to_fp4_scale(S0[31 : 16].f16, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = f16_to_fp4_scale(S0[15 : 0].f16, scale.u8) tmp1 = f16_to_fp4_scale(S0[31 : 16].f16, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = bf16_to_fp4_scale(S0[15 : 0].bf16, scale.u8); - # tmp1 = bf16_to_fp4_scale(S0[31 : 16].bf16, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = bf16_to_fp4_scale(S0[15 : 0].bf16, scale.u8) tmp1 = bf16_to_fp4_scale(S0[31 : 16].bf16, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # tmp0 = f16_to_fp4_sr_scale(S0[15 : 0].f16, randomVal, scale.u8); - # tmp1 = f16_to_fp4_sr_scale(S0[31 : 16].f16, randomVal, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) @@ -17954,20 +5611,9 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_m tmp0 = f16_to_fp4_sr_scale(S0[15 : 0].f16, randomVal, scale.u8) tmp1 = f16_to_fp4_sr_scale(S0[31 : 16].f16, randomVal, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # tmp0 = bf16_to_fp4_sr_scale(S0[15 : 0].bf16, randomVal, scale.u8); - # tmp1 = bf16_to_fp4_sr_scale(S0[31 : 16].bf16, randomVal, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) @@ -17975,20 +5621,10 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_ tmp0 = bf16_to_fp4_sr_scale(S0[15 : 0].bf16, randomVal, scale.u8) tmp1 = bf16_to_fp4_sr_scale(S0[31 : 16].bf16, randomVal, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; - # D0[15 : 0].f16 = tmp0; - # D0[31 : 16].f16 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17996,20 +5632,10 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8 D0[15 : 0].f16 = tmp0 D0[31 : 16].f16 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; - # D0[15 : 0].bf16 = tmp0; - # D0[31 : 16].bf16 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -18017,446 +5643,42 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mas src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8 D0[15 : 0].bf16 = tmp0 D0[31 : 16].bf16 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # declare tmp : 192'B; - # for pass in 0 : 15 do - # // Note that S0 and S1 inputs are interleaved in the packed result. - # tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8); - # tmp[dOffset + 11 : dOffset + 6].fp6 = f32_to_fp6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - for pass in range(0, int(15)+1): - tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8) - tmp[dOffset + 11 : dOffset + 6].fp6 = f32_to_fp6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) - D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # declare tmp : 192'B; - # for pass in 0 : 15 do - # // Note that S0 and S1 inputs are interleaved in the packed result. - # tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8); - # tmp[dOffset + 11 : dOffset + 6].bf6 = f32_to_bf6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - for pass in range(0, int(15)+1): - tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8) - tmp[dOffset + 11 : dOffset + 6].bf6 = f32_to_bf6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) - D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 1024'B; - # for pass in 0 : 31 do - # tmp[dOffset + 31 : dOffset].f32 = fp6_to_f32_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - # endfor; - # D0[1023 : 0] = tmp.b1024 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 31 : dOffset].f32 = fp6_to_f32_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - D0[1023 : 0] = tmp.b1024 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 1024'B; - # for pass in 0 : 31 do - # tmp[dOffset + 31 : dOffset].f32 = bf6_to_f32_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - # endfor; - # D0[1023 : 0] = tmp.b1024 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 31 : dOffset].f32 = bf6_to_f32_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - D0[1023 : 0] = tmp.b1024 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].fp6 = bf16_to_fp6_scale(S0[sOffset + 15 : sOffset].bf16, scale.u8) - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].fp6 = bf16_to_fp6_scale(S0[sOffset + 15 : sOffset].bf16, scale.u8) - D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_scale(S0[sOffset + 15 : sOffset].f16, scale.u8) - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_scale(S0[sOffset + 15 : sOffset].f16, scale.u8) - D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].bf6 = bf16_to_bf6_scale(S0[sOffset + 15 : sOffset].bf16, scale.u8) - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].bf6 = bf16_to_bf6_scale(S0[sOffset + 15 : sOffset].bf16, scale.u8) - D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].fp6 = f16_to_fp6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].fp6 = f16_to_fp6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].fp6 = bf16_to_fp6_sr_scale(S0[sOffset + 15 : sOffset].bf16, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].fp6 = bf16_to_fp6_sr_scale(S0[sOffset + 15 : sOffset].bf16, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].bf6 = bf16_to_bf6_sr_scale(S0[sOffset + 15 : sOffset].bf16, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].bf6 = bf16_to_bf6_sr_scale(S0[sOffset + 15 : sOffset].bf16, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 512'B; - # for pass in 0 : 31 do - # tmp[dOffset + 15 : dOffset].f16 = fp6_to_f16_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - # endfor; - # D0[511 : 0] = tmp.b512 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 15 : dOffset].f16 = fp6_to_f16_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - D0[511 : 0] = tmp.b512 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 512'B; - # for pass in 0 : 31 do - # tmp[dOffset + 15 : dOffset].bf16 = fp6_to_bf16_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - # endfor; - # D0[511 : 0] = tmp.b512 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 15 : dOffset].bf16 = fp6_to_bf16_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - D0[511 : 0] = tmp.b512 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 512'B; - # for pass in 0 : 31 do - # tmp[dOffset + 15 : dOffset].f16 = bf6_to_f16_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - # endfor; - # D0[511 : 0] = tmp.b512 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 15 : dOffset].f16 = bf6_to_f16_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - D0[511 : 0] = tmp.b512 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 512'B; - # for pass in 0 : 31 do - # tmp[dOffset + 15 : dOffset].bf16 = bf6_to_bf16_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - # endfor; - # D0[511 : 0] = tmp.b512 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 15 : dOffset].bf16 = bf6_to_bf16_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - D0[511 : 0] = tmp.b512 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_ASHR_PK_I8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 16'B; - # tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32); - # tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32); - # D0[15 : 0] = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_ASHR_PK_I8_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32) tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32) D0[15 : 0] = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ASHR_PK_U8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 16'B; - # tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32); - # tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32); - # D0[15 : 0] = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_ASHR_PK_U8_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32) tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32) D0[15 : 0] = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_PK_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PK_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PK_BF16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].bf16 = f32_to_bf16(S0.f32); - # tmp[31 : 16].bf16 = f32_to_bf16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PK_BF16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].bf16 = f32_to_bf16(S0.f32) tmp[31 : 16].bf16 = f32_to_bf16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[15 : 0].bf16 = tmp0.bf16; - # D0[31 : 16].bf16 = tmp1.bf16 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -18464,20 +5686,10 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mas src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[15 : 0].bf16 = tmp0.bf16 D0[31 : 16].bf16 = tmp1.bf16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[15 : 0].bf16 = tmp0.bf16; - # D0[31 : 16].bf16 = tmp1.bf16 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -18485,33 +5697,15 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mas src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[15 : 0].bf16 = tmp0.bf16 D0[31 : 16].bf16 = tmp1.bf16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 32'F(v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MINIMUM3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = F(v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 32'F(v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAXIMUM3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = F(v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3AOp_FUNCTIONS = { VOP3AOp.V_CMP_CLASS_F32: _VOP3AOp_V_CMP_CLASS_F32, @@ -18920,8 +6114,6 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_PACK_B32_F16: _VOP3AOp_V_PACK_B32_F16, VOP3AOp.V_MUL_LEGACY_F32: _VOP3AOp_V_MUL_LEGACY_F32, VOP3AOp.V_DOT2C_F32_BF16: _VOP3AOp_V_DOT2C_F32_BF16, - VOP3AOp.V_BITOP3_B16: _VOP3AOp_V_BITOP3_B16, - VOP3AOp.V_BITOP3_B32: _VOP3AOp_V_BITOP3_B32, VOP3AOp.V_CVT_SCALEF32_PK_FP8_F32: _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32, VOP3AOp.V_CVT_SCALEF32_PK_BF8_F32: _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32, VOP3AOp.V_CVT_SCALEF32_SR_FP8_F32: _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32, @@ -18951,23 +6143,6 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_CVT_SCALEF32_SR_PK_FP4_BF16: _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16, VOP3AOp.V_CVT_SCALEF32_PK_F16_FP4: _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4, VOP3AOp.V_CVT_SCALEF32_PK_BF16_FP4: _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4, - VOP3AOp.V_CVT_SCALEF32_2XPK16_FP6_F32: _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32, - VOP3AOp.V_CVT_SCALEF32_2XPK16_BF6_F32: _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_FP6_F32: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_BF6_F32: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32, - VOP3AOp.V_CVT_SCALEF32_PK32_F32_FP6: _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6, - VOP3AOp.V_CVT_SCALEF32_PK32_F32_BF6: _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6, - VOP3AOp.V_CVT_SCALEF32_PK32_FP6_BF16: _VOP3AOp_V_CVT_SCALEF32_PK32_FP6_BF16, - VOP3AOp.V_CVT_SCALEF32_PK32_BF6_F16: _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16, - VOP3AOp.V_CVT_SCALEF32_PK32_BF6_BF16: _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_BF16, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_FP6_F16: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_FP6_BF16: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_BF16, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_BF6_F16: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_BF6_BF16: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_BF16, - VOP3AOp.V_CVT_SCALEF32_PK32_F16_FP6: _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6, - VOP3AOp.V_CVT_SCALEF32_PK32_BF16_FP6: _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_FP6, - VOP3AOp.V_CVT_SCALEF32_PK32_F16_BF6: _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6, - VOP3AOp.V_CVT_SCALEF32_PK32_BF16_BF6: _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_BF6, VOP3AOp.V_ASHR_PK_I8_I32: _VOP3AOp_V_ASHR_PK_I8_I32, VOP3AOp.V_ASHR_PK_U8_I32: _VOP3AOp_V_ASHR_PK_U8_I32, VOP3AOp.V_CVT_PK_F16_F32: _VOP3AOp_V_CVT_PK_F16_F32, @@ -18978,162 +6153,44 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_MAXIMUM3_F32: _VOP3AOp_V_MAXIMUM3_F32, } -def _VOP3BOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32; - # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_SUBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32) VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_ADDC_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_SUBB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_SUBBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # D0.f32 = NAN.f32 - # elsif exponent(S2.f32) - exponent(S1.f32) >= 96 then - # // N/D near MAX_FLOAT_F32 - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif S1.f32 == DENORM.f32 then - # D0.f32 = ldexp(S0.f32, 64) - # elsif ((1.0 / 64'F(S1.f32) == DENORM.f64) && (S2.f32 / S1.f32 == DENORM.f32)) then - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif 1.0 / 64'F(S1.f32) == DENORM.f64 then - # D0.f32 = ldexp(S0.f32, -64) - # elsif S2.f32 / S1.f32 == DENORM.f32 then - # VCC = 0x1LL; - # if S0.f32 == S2.f32 then - # // Only scale the numerator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif exponent(S2.f32) <= 23 then - # // Numerator is tiny - # D0.f32 = ldexp(S0.f32, 64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3BOp_V_DIV_SCALE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((F(S2.f32) == 0.0) or (F(S1.f32) == 0.0)): @@ -19156,47 +6213,10 @@ def _VOP3BOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(0x1); D0.f32 = ldexp(S0.f32, 64) if S1.f32 == DENORM.f32: D0.f32 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3BOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then - # D0.f64 = NAN.f64 - # elsif exponent(S2.f64) - exponent(S1.f64) >= 768 then - # // N/D near MAX_FLOAT_F64 - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, 128) - # elsif ((1.0 / S1.f64 == DENORM.f64) && (S2.f64 / S1.f64 == DENORM.f64)) then - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif 1.0 / S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, -128) - # elsif S2.f64 / S1.f64 == DENORM.f64 then - # VCC = 0x1LL; - # if S0.f64 == S2.f64 then - # // Only scale the numerator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif exponent(S2.f64) <= 53 then - # // Numerator is tiny - # D0.f64 = ldexp(S0.f64, 128) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3BOp_V_DIV_SCALE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((S2.f64 == 0.0) or (S1.f64 == 0.0)): @@ -19219,45 +6239,23 @@ def _VOP3BOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f64 = ldexp(S0.f64, 128) if S1.f64 == DENORM.f64: D0.f64 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3BOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3BOp_V_MAD_U64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.u32) * (S1.u32) + (S2.u64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} -def _VOP3BOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3BOp_V_MAD_I64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.i32) * (S1.i32) + (S2.i64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} VOP3BOp_FUNCTIONS = { VOP3BOp.V_ADD_CO_U32: _VOP3BOp_V_ADD_CO_U32, diff --git a/extra/assembly/amd/autogen/rdna3/gen_pcode.py b/extra/assembly/amd/autogen/rdna3/gen_pcode.py index 3c9c0a93f9..fa9392de7a 100644 --- a/extra/assembly/amd/autogen/rdna3/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna3/gen_pcode.py @@ -5,1298 +5,449 @@ from extra.assembly.amd.autogen.rdna3.enum import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b64 = S0.b64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b32 = S0.b32 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b64 = S0.b64 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[63 : 0] = S0.u64[0 : 63] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[63 : 0] = S0.u64[0 : 63] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from LSB - # if S0.u64[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CTZ_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from MSB - # if S0.u64[63 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLZ_I32_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[63 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.u32[31 - i] != S0.u32[31] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(31)+1): if S0.u32[31 - i] != S0.u32[31]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 63 do - # // Search from MSB - # if S0.u64[63 - i] != S0.u64[63] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLS_I32_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(63)+1): if S0.u64[63 - i] != S0.u64[63]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i8)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i8)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32; - # for i in 0 : 31 do - # D0.u64[i * 2] = tmp[i]; - # D0.u64[i * 2 + 1] = tmp[i] - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITREPLICATE_B64_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32) for i in range(0, int(31)+1): D0.u64[i * 2] = tmp[i] D0.u64[i * 2 + 1] = tmp[i] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_ABS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((-S0.i32) if (S0.i32 < 0) else (S0.i32)) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # for i in 0 : 7 do - # tmp[i] = S0.u32[i * 4 +: 4] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(7)+1): tmp[i] = S0.u32[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # for i in 0 : 15 do - # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(15)+1): tmp[i] = S0.u64[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # declare i : 6'U; - # for i in 6'0U : 6'31U do - # tmp[i] = S0.u32[i & 6'60U +: 6'4U] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp[i] = S0.u32[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # declare i : 6'U; - # for i in 6'0U : 6'63U do - # tmp[i] = S0.u64[i & 6'60U +: 6'4U] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp[i] = S0.u64[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~S0.u64; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~S0.u64 SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set - # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set - # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 ^ EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 ^ EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NAND_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NAND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 ^ EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XNOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 ^ EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XNOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u32; - # EXEC.u32 = (~S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (~S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u32; - # EXEC.u32 = (~S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (~S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 & ~EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 & ~EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 | ~EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 | ~EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u32 = (~S0.u32 & EXEC.u32); - # D0.u32 = EXEC.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_WREXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u32 = (~S0.u32 & EXEC.u32) D0.u32 = EXEC.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u32 = (S0.u32 & ~EXEC.u32); - # D0.u32 = EXEC.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_WREXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u32 = (S0.u32 & ~EXEC.u32) D0.u32 = EXEC.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_GETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = PC + 4 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # jump_addr = S0.i64; - # D0.i64 = PC + 4LL; - # PC = jump_addr.i64 - S0 = Reg(s0) - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SWAPPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): jump_addr = S0.i64 D0.i64 = PC + 4 PC = Reg(jump_addr.i64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} -def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_RFE_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If SDST is VCC then VCCZ is undefined. - VCC = Reg(vcc) - VCCZ = Reg(1 if VCC._val == 0 else 0) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result +def _SOP1Op_S_SENDMSG_RTN_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If SDST is VCC then VCCZ is undefined. - VCC = Reg(vcc) - VCCZ = Reg(1 if VCC._val == 0 else 0) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result +def _SOP1Op_S_SENDMSG_RTN_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0[31 : 16].f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_HI_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0[31 : 16].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} SOP1Op_FUNCTIONS = { SOP1Op.S_MOV_B32: _SOP1Op_S_MOV_B32, @@ -1376,815 +527,282 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_RNDNE_F16: _SOP1Op_S_RNDNE_F16, } -def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) SCC = Reg(((1) if (S1.u32 > S0.u32) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 + S1.i32; - # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 + S1.i32) SCC = Reg(((S0.u32[31] == S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 - S1.i32; - # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 - S1.i32) SCC = Reg(((S0.u32[31] != S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADDC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + SCC.u64) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - SCC.u32; - # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUBB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - SCC.u32) SCC = Reg(((1) if ((S1.u32) + SCC.u64 > (S0.u32)) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32; - # if D0.i32 < 0 then - # D0.i32 = -D0.i32 - # endif; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ABSDIFF_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 if D0.i32 < 0: D0.i32 = -D0.i32 SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 << S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 << S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 >> S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 >> S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 >> S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 >> S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i32) >> S1[4 : 0].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL1_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 1) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL2_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 2) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL3_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 3) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL4_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 4) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_NOT1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_NOT1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_NOT1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_NOT1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); - # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (((1 << S0[5 : 0].u32) - 1) << S1[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 * S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 * S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = SCC ? S0.u64 : S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64) if (SCC) else (S1.u64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LL_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[15 : 0].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_HH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[31 : 16].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[15 : 0].u16, S0[31 : 16].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_HL_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[15 : 0].u16, S0[31 : 16].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -2207,44 +825,9 @@ def _SOP2Op_S_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -2267,127 +850,45 @@ def _SOP2Op_S_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _SOP2Op_S_FMAAK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _SOP2Op_S_FMAMK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _SOP2Op_S_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -2410,44 +911,9 @@ def _SOP2Op_S_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -2470,31 +936,15 @@ def _SOP2Op_S_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} SOP2Op_FUNCTIONS = { SOP2Op.S_ADD_U32: _SOP2Op_S_ADD_U32, @@ -2566,523 +1016,189 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_FMAC_F16: _SOP2Op_S_FMAC_F16, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 == S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 != S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 > S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 <= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 == S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 != S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 > S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 <= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 == S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 != S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg((isNAN(F(S0.f32)) or isNAN(F(S1.f32)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg((isNAN(F(S0.f16)) or isNAN(F(S1.f16)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 >= S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 >= S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 != S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 != S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 > S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 > S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 <= S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 <= S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 == S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 == S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 < S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 < S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_EQ_I32: _SOPCOp_S_CMP_EQ_I32, @@ -3133,211 +1249,114 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_NLT_F16: _SOPCOp_S_CMP_NLT_F16, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(SIMM16.i16)) - D0 = Reg(d0) +def _SOPKOp_S_MOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- D0.i32 = (signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Do nothing - for use by tools only - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result +def _SOPKOp_S_VERSION(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.i32 = 32'I(signext(SIMM16.i16)) - # endif - D0 = Reg(d0) - SCC = Reg(scc) +def _SOPKOp_S_CMOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- if SCC: D0.i32 = (signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) == signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) == signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) != signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) != signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) > signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) > signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) >= signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) >= signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) < signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) < signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) <= signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) <= signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 == 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 == (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 != 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 != (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 > 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 > (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 >= (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 < (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <= 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 <= (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # D0.i32 = 32'I(64'I(D0.i32) + signext(SIMM16.i16)); - # SCC = ((tmp[31] == SIMM16.i16[15]) && (tmp[31] != D0.i32[31])); - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) +def _SOPKOp_S_ADDK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- tmp = Reg(D0.i32) D0.i32 = ((D0.i32) + signext(SIMM16.i16)) SCC = Reg(((tmp[31] == SIMM16.i16[15]) and (tmp[31] != D0.i32[31]))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(64'I(D0.i32) * signext(SIMM16.i16)) - D0 = Reg(d0) +def _SOPKOp_S_MULK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- D0.i32 = ((D0.i32) * signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL; - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - D0 = Reg(d0) +def _SOPKOp_S_CALL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- D0.i64 = PC + 4 PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, @@ -3360,259 +1379,118 @@ SOPKOp_FUNCTIONS = { SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # for i in 0U : SIMM16.u16[3 : 0].u32 do - # endfor +def _SOPPOp_S_NOP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- for i in range(0, int(SIMM16.u16[3 : 0].u32)+1): pass - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # instruction may be omitted. For wave64 the compiler may not know the status of the EXEC mask and hence - # // 1 cycle delay here - # // 2 cycles delay here - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result +def _SOPPOp_S_DELAY_ALU(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // PC passed into trap handler points to S_TRAP itself, - # PC = TBA.i64; - # // trap base address - PC = Reg(pc) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result +def _SOPPOp_S_TRAP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {'PC': PC} -def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; +def _SOPPOp_S_BRANCH(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 1 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 0 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if WAVE_STATUS.COND_DBG_SYS.u32 != 0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if WAVE_STATUS.COND_DBG_SYS.u32 != 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGUSER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if WAVE_STATUS.COND_DBG_USER.u32 != 0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGUSER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if WAVE_STATUS.COND_DBG_USER.u32 != 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_STATUS.COND_DBG_SYS || WAVE_STATUS.COND_DBG_USER) then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if (WAVE_STATUS.COND_DBG_SYS or WAVE_STATUS.COND_DBG_USER): PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_STATUS.COND_DBG_SYS && WAVE_STATUS.COND_DBG_USER) then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if (WAVE_STATUS.COND_DBG_SYS and WAVE_STATUS.COND_DBG_USER): PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, @@ -3631,40 +1509,11 @@ SOPPOp_FUNCTIONS = { SOPPOp.S_CBRANCH_CDBGSYS_AND_USER: _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE64 then - # // 64 lanes - # if EXEC == 0x0LL then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b64(EXEC)); - # // Lowest active lane - # endif - # else - # // 32 lanes - # if EXEC_LO.i32 == 0 then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b32(EXEC_LO)); - # // Lowest active lane - # endif - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP1Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) EXEC_LO = SliceProxy(EXEC, 31, 0) # --- compiled pseudocode --- @@ -3679,914 +1528,363 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter else: lane = (s_ff1_i32_b32(EXEC_LO)) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NEAREST_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_FLOOR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = S0.b16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = S0.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # addr = SRC0.u32; - # // Raw value from instruction - # D0.b32 = VGPR[laneId][addr].b32 - D0 = Reg(d0) - laneId = lane +def _VOP1Op_V_MOVRELS_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- addr = SRC0.u32 D0.b32 = VGPR[laneId][addr].b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.f16 = S0.f16 - # else - # D0.f16 = mantissa(S0.f16) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.f16 = S0.f16 else: D0.f16 = mantissa(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.i16 = 16'0 - # else - # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.i16 = 0 else: D0.i16 = (exponent(S0.f16) - 15 + 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + -floor(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + -floor(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sin(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = cos(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = { SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16) } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SAT_PK_U8_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = _pack(SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_snorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_snorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_unorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_unorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.b32; - # D0.b32 = S0.b32; - # S0.b32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SWAP_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.b32) D0.b32 = S0.b32 S0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.b16; - # D0.b16 = S0.b16; - # S0.b16 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SWAP_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.b16) D0.b16 = S0.b16 S0.b16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = ~S0.u16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_NOT_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ~S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { 16'0, S0.u16 } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(0, S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} VOP1Op_FUNCTIONS = { VOP1Op.V_MOV_B32: _VOP1Op_V_MOV_B32, @@ -4669,199 +1967,64 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_U32_U16: _VOP1Op_V_CVT_U32_U16, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP2Op_V_DOT2ACC_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT2ACC_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = S2.f32 - # else - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = S2.f32 else: D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = 0.0F - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = 0.0 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -4884,44 +2047,9 @@ def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -4944,387 +2072,139 @@ def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAMK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAAK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP2Op_V_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAMK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAAK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -5347,44 +2227,9 @@ def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -5407,33 +2252,16 @@ def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16); - # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_PK_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP2Op_FUNCTIONS = { VOP2Op.V_CNDMASK_B32: _VOP2Op_V_CNDMASK_B32, @@ -5484,1937 +2312,375 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, } -def _VOP3Op_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -7428,54 +2694,9 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -7489,54 +2710,9 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -7550,1245 +2726,377 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 < S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f64 == S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 > S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <> S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 >= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 < S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i16 == S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 > S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <> S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 >= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 < S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u16 == S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 > S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <> S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 >= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 < S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i64 == S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 > S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <> S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 >= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 < S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 > S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 >= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -8802,46 +3110,9 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -8855,46 +3126,9 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -8908,45 +3142,13 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE64 then - # // 64 lanes - # if EXEC == 0x0LL then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b64(EXEC)); - # // Lowest active lane - # endif - # else - # // 32 lanes - # if EXEC_LO.i32 == 0 then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b32(EXEC_LO)); - # // Lowest active lane - # endif - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP3Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) EXEC_LO = SliceProxy(EXEC, 31, 0) # --- compiled pseudocode --- @@ -8961,1060 +3163,403 @@ def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter else: lane = (s_ff1_i32_b32(EXEC_LO)) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NEAREST_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_FLOOR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = S0.b16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MOV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = S0.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # addr = SRC0.u32; - # // Raw value from instruction - # D0.b32 = VGPR[laneId][addr].b32 - D0 = Reg(d0) - laneId = lane +def _VOP3Op_V_MOVRELS_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- addr = SRC0.u32 D0.b32 = VGPR[laneId][addr].b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.f16 = S0.f16 - # else - # D0.f16 = mantissa(S0.f16) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.f16 = S0.f16 else: D0.f16 = mantissa(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.i16 = 16'0 - # else - # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.i16 = 0 else: D0.i16 = (exponent(S0.f16) - 15 + 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + -floor(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + -floor(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sin(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_COS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = cos(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = { SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16) } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAT_PK_U8_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = _pack(SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_snorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_snorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_unorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_unorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = ~S0.u16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_NOT_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ~S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { 16'0, S0.u16 } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(0, S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = S2.f32 - # else - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMAC_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = S2.f32 else: D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = 0.0F - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = 0.0 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -10037,44 +3582,9 @@ def _VOP3Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -10097,279 +3607,97 @@ def _VOP3Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -10392,44 +3720,9 @@ def _VOP3Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -10452,95 +3745,28 @@ def _VOP3Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = S2.f32 - # else - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = S2.f32 else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = 5.0F - # else - # D0.f32 = 4.0F - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = 3.0F - # else - # D0.f32 = 2.0F - # endif - # else - # if S0.f32 < 0.0F then - # D0.f32 = 1.0F - # else - # D0.f32 = 0.0F - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBEID_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = 5.0 @@ -10556,36 +3782,9 @@ def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = 1.0 else: D0.f32 = 0.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap S coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = -S0.f32 - # else - # D0.f32 = S0.f32 - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S0.f32 - # else - # if S0.f32 < 0.0F then - # D0.f32 = S2.f32 - # else - # D0.f32 = -S2.f32 - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBESC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = -S0.f32 @@ -10598,32 +3797,9 @@ def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = S2.f32 else: D0.f32 = -S2.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap T coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = -S1.f32 - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = -S2.f32 - # else - # D0.f32 = S2.f32 - # endif - # else - # D0.f32 = -S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBETC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = -S1.f32 elif abs(S1.f32) >= abs(S0.f32): @@ -10633,254 +3809,88 @@ def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = S2.f32 else: D0.f32 = -S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = 2.0 * cubemap major axis. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = S2.f32 * 2.0F - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S1.f32 * 2.0F - # else - # D0.f32 = S0.f32 * 2.0F - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBEMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = S2.f32 * 2.0 elif abs(S1.f32) >= abs(S0.f32): D0.f32 = S1.f32 * 2.0 else: D0.f32 = S0.f32 * 2.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFI_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); - # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); - # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); - # tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1U); - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_LERP_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1 << 24)) tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1 << 16) tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1 << 8) tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0].u32) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ALIGNBIT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> S2.u32[4 : 0].u32) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0].u32 * 8U)) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ALIGNBYTE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> (S2.u32[1 : 0].u32 * 8)) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S1.f32 == -MAX_FLOAT_F32) || (64'F(S1.f32) == -INF) || isNAN(64'F(S1.f32)) || (S2.f32 <= 0.0F) || - # isNAN(64'F(S2.f32))) then - # D0.f32 = -MAX_FLOAT_F32 - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MULLIT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S1.f32 == -MAX_FLOAT_F32) or (F(S1.f32) == (-INF)) or isNAN(F(S1.f32)) or (S2.f32 <= 0.0) or isNAN(F(S2.f32))): D0.f32 = -MAX_FLOAT_F32 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then - # D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) - # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then - # D0.f32 = v_max_f32(S1.f32, S2.f32) - # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S1.f32 then - # D0.f32 = v_max_f32(S0.f32, S2.f32) - # else - # D0.f32 = v_max_f32(S0.f32, S1.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f32)) or isNAN(F(S1.f32)) or isNAN(F(S2.f32))): D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) elif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32: @@ -10889,172 +3899,57 @@ def _VOP3Op_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = v_max_f32(S0.f32, S2.f32) else: D0.f32 = v_max_f32(S0.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then - # D0.i32 = v_max_i32(S1.i32, S2.i32) - # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then - # D0.i32 = v_max_i32(S0.i32, S2.i32) - # else - # D0.i32 = v_max_i32(S0.i32, S1.i32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32: D0.i32 = v_max_i32(S1.i32, S2.i32) elif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32: D0.i32 = v_max_i32(S0.i32, S2.i32) else: D0.i32 = v_max_i32(S0.i32, S1.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then - # D0.u32 = v_max_u32(S1.u32, S2.u32) - # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then - # D0.u32 = v_max_u32(S0.u32, S2.u32) - # else - # D0.u32 = v_max_u32(S0.u32, S1.u32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32: D0.u32 = v_max_u32(S1.u32, S2.u32) elif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32: D0.u32 = v_max_u32(S0.u32, S2.u32) else: D0.u32 = v_max_u32(S0.u32, S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += (ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])) tmp += (ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])) tmp += (ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])) tmp += (ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_HI_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((v_sad_u8(S0, S1, 0)) << 16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); - # tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16) tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); - # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_PK_U8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S2.u32 & (~(0xff << (S1.u32[1 : 0].u32 * 8))))) tmp = Reg((tmp | (((f32_to_u8(S0.f32)) & 255) << (S1.u32[1 : 0].u32 * 8)))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f32) ^ sign(S2.f32)); - # if isNAN(64'F(S2.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif ((64'F(S1.f32) == 0.0) && (64'F(S2.f32) == 0.0)) then - # // 0/0 - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(abs(S1.f32)) == +INF) && (64'F(abs(S2.f32)) == +INF)) then - # // inf/inf - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(S1.f32) == 0.0) || (64'F(abs(S2.f32)) == +INF)) then - # // x/0, or inf/y - # D0.f32 = sign_out ? -INF.f32 : +INF.f32 - # elsif ((64'F(abs(S1.f32)) == +INF) || (64'F(S2.f32) == 0.0)) then - # // x/inf, 0/y - # D0.f32 = sign_out ? -0.0F : 0.0F - # elsif exponent(S2.f32) - exponent(S1.f32) < -150 then - # D0.f32 = sign_out ? -UNDERFLOW_F32 : UNDERFLOW_F32 - # elsif exponent(S1.f32) == 255 then - # D0.f32 = sign_out ? -OVERFLOW_F32 : OVERFLOW_F32 - # else - # D0.f32 = sign_out ? -abs(S0.f32) : abs(S0.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f32) ^ sign(S2.f32)) if isNAN(F(S2.f32)): D0.f32 = F(cvtToQuietNAN(F(S2.f32))) @@ -11074,40 +3969,9 @@ def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) else: D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) if isNAN(S0.f32) else ((-abs(S0.f32)) if (sign_out) else (abs(S0.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f64) ^ sign(S2.f64)); - # if isNAN(S2.f64) then - # D0.f64 = cvtToQuietNAN(S2.f64) - # elsif isNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif ((S1.f64 == 0.0) && (S2.f64 == 0.0)) then - # // 0/0 - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((abs(S1.f64) == +INF) && (abs(S2.f64) == +INF)) then - # // inf/inf - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((S1.f64 == 0.0) || (abs(S2.f64) == +INF)) then - # // x/0, or inf/y - # D0.f64 = sign_out ? -INF : +INF - # elsif ((abs(S1.f64) == +INF) || (S2.f64 == 0.0)) then - # // x/inf, 0/y - # D0.f64 = sign_out ? -0.0 : 0.0 - # elsif exponent(S2.f64) - exponent(S1.f64) < -1075 then - # D0.f64 = sign_out ? -UNDERFLOW_F64 : UNDERFLOW_F64 - # elsif exponent(S1.f64) == 2047 then - # D0.f64 = sign_out ? -OVERFLOW_F64 : OVERFLOW_F64 - # else - # D0.f64 = sign_out ? -abs(S0.f64) : abs(S0.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f64) ^ sign(S2.f64)) if isNAN(S2.f64): D0.f64 = cvtToQuietNAN(S2.f64) @@ -11127,90 +3991,32 @@ def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) else: D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) - # else - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FMAS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f32 = (2.0 ** 64 if exponent(S2.f32) > 127 else 2.0 ** -64) * fma(S0.f32, S1.f32, S2.f32) else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) - # else - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FMAS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f64 = (2.0 ** 128 if exponent(S2.f64) > 1023 else 2.0 ** -128) * fma(S0.f64, S1.f64, S2.f64) else: D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += S1.u32[15 : 8] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += S1.u32[23 : 16] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += S1.u32[31 : 24] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_MSAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ((0) if (S1.u32[7 : 0] == 0) else ((ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])))) tmp += ((0) if (S1.u32[15 : 8] == 0) else ((ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])))) tmp += ((0) if (S1.u32[23 : 16] == 0) else ((ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])))) tmp += ((0) if (S1.u32[31 : 24] == 0) else ((ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_QSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -11218,21 +4024,9 @@ def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, tmp[31 : 16] = (v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_MQSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -11240,21 +4034,9 @@ def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal tmp[31 : 16] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); - # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); - # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); - # tmp[31 : 0] = 32'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)); - # D0.b128 = tmp.b128 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_MQSAD_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[127 : 96] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)) @@ -11262,187 +4044,64 @@ def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp[63 : 32] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)) tmp[31 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)) D0.b128 = tmp.b128 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR3_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 + S2.u16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 + S2.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); - # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); - # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); - # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_PERM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHL_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_LSHL_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then - # D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) - # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then - # D0.f16 = v_max_f16(S1.f16, S2.f16) - # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S1.f16 then - # D0.f16 = v_max_f16(S0.f16, S2.f16) - # else - # D0.f16 = v_max_f16(S0.f16, S1.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f16)) or isNAN(F(S1.f16)) or isNAN(F(S2.f16))): D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) elif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16: @@ -11451,94 +4110,31 @@ def _VOP3Op_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = v_max_f16(S0.f16, S2.f16) else: D0.f16 = v_max_f16(S0.f16, S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then - # D0.i16 = v_max_i16(S1.i16, S2.i16) - # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then - # D0.i16 = v_max_i16(S0.i16, S2.i16) - # else - # D0.i16 = v_max_i16(S0.i16, S1.i16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16: D0.i16 = v_max_i16(S1.i16, S2.i16) elif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16: D0.i16 = v_max_i16(S0.i16, S2.i16) else: D0.i16 = v_max_i16(S0.i16, S1.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then - # D0.u16 = v_max_u16(S1.u16, S2.u16) - # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then - # D0.u16 = v_max_u16(S0.u16, S2.u16) - # else - # D0.u16 = v_max_u16(S0.u16, S1.u16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16: D0.u16 = v_max_u16(S1.u16, S2.u16) elif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16: D0.u16 = v_max_u16(S0.u16, S2.u16) else: D0.u16 = v_max_u16(S0.u16, S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 * S1.i16 + S2.i16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 * S1.i16 + S2.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f16) ^ sign(S2.f16)); - # if isNAN(64'F(S2.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif ((64'F(S1.f16) == 0.0) && (64'F(S2.f16) == 0.0)) then - # // 0/0 - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(abs(S1.f16)) == +INF) && (64'F(abs(S2.f16)) == +INF)) then - # // inf/inf - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(S1.f16) == 0.0) || (64'F(abs(S2.f16)) == +INF)) then - # // x/0, or inf/y - # D0.f16 = sign_out ? -INF.f16 : +INF.f16 - # elsif ((64'F(abs(S1.f16)) == +INF) || (64'F(S2.f16) == 0.0)) then - # // x/inf, 0/y - # D0.f16 = sign_out ? -16'0.0 : 16'0.0 - # else - # D0.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f16) ^ sign(S2.f16)) if isNAN(F(S2.f16)): D0.f16 = F(cvtToQuietNAN(F(S2.f16))) @@ -11554,576 +4150,211 @@ def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f16 = ((-0.0) if (sign_out) else (0.0)) else: D0.f16 = ((-abs(S0.f16)) if (sign_out) else (abs(S0.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHL_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32 | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR3_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32 | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u16) * (S1.u16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i16) * (S1.i16) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = VCC.u64[laneId] ? S1.u16 : S0.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CNDMASK_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S1.u16) if (VCC.u64[laneId]) else (S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_min_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_min_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_max_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_max_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_min_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_min_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_max_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_max_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f16; - # tmp += S0[15 : 0].f16 * S1[15 : 0].f16; - # tmp += S0[31 : 16].f16 * S1[31 : 16].f16; - # D0.f16 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_DOT2_F16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f16) tmp += S0[15 : 0].f16 * S1[15 : 0].f16 tmp += S0[31 : 16].f16 * S1[31 : 16].f16 D0.f16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.bf16; - # tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16; - # tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16; - # D0.bf16 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_DOT2_BF16_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.bf16) tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16 tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16 D0.bf16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 + S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 + S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 - S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 - S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[31 : 16] = 16'B(v_cvt_i16_f32(S1.f32)); - # tmp[15 : 0] = 16'B(v_cvt_i16_f32(S0.f32)); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_I16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16] = (v_cvt_i16_f32(S1.f32)) tmp[15 : 0] = (v_cvt_i16_f32(S0.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[31 : 16] = 16'B(v_cvt_u16_f32(S1.f32)); - # tmp[15 : 0] = 16'B(v_cvt_u16_f32(S0.f32)); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_U16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16] = (v_cvt_u16_f32(S1.f32)) tmp[15 : 0] = (v_cvt_u16_f32(S0.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 + S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 + S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 - S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 - S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 16].f16 = S1.f16; - # D0[15 : 0].f16 = S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_PACK_B32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 16].f16 = S1.f16 D0[15 : 0].f16 = S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); - # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f16_to_snorm(S0.f16) tmp[31 : 16].i16 = f16_to_snorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); - # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f16_to_unorm(S0.f16) tmp[31 : 16].u16 = f16_to_unorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * 2.0F ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32; - # for i in 0 : 31 do - # tmp += S0[i].u32; - # // count i'th bit - # endfor; - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_BCNT_U32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32) for i in range(0, int(31)+1): tmp += S0[i].u32 D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); - # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_I16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f32_to_snorm(S0.f32) tmp[31 : 16].i16 = f32_to_snorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); - # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_U16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f32_to_unorm(S0.f32) tmp[31 : 16].u16 = f32_to_unorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = u32_to_u16(S0.u32); - # tmp[31 : 16].u16 = u32_to_u16(S1.u32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_U16_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = u32_to_u16(S0.u32) tmp[31 : 16].u16 = u32_to_u16(S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = i32_to_i16(S0.i32); - # tmp[31 : 16].i16 = i32_to_i16(S1.i32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_I16_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = i32_to_i16(S0.i32) tmp[31 : 16].i16 = i32_to_i16(S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 + S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 + S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(S0.f64) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isSignalNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isQuietNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif isQuietNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif LT_NEG_ZERO(S0.f64, S1.f64) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - # else - # if isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif LT_NEG_ZERO(S0.f64, S1.f64) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(S0.f64): D0.f64 = cvtToQuietNAN(S0.f64) @@ -12146,45 +4377,9 @@ def _VOP3Op_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(S0.f64) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isSignalNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isQuietNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif isQuietNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif GT_NEG_ZERO(S0.f64, S1.f64) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - # else - # if isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif GT_NEG_ZERO(S0.f64, S1.f64) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(S0.f64): D0.f64 = cvtToQuietNAN(S0.f64) @@ -12207,74 +4402,25 @@ def _VOP3Op_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * 2.0 ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 * S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_LO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 * S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # shift = 32'I(S1[4 : 0].u32) * 53; - # if exponent(S0.f64) > 1077 then - # shift += exponent(S0.f64) - 1077 - # endif; - # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} - # // b_1200 is the MSB of the fractional part of 2.0/PI - # // Left shift operation indicates which bits are brought - # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); - # scale = -53 - shift; - # if exponent(S0.f64) >= 1968 then - # scale += 128 - # endif; - # D0.f64 = ldexp(result, scale) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRIG_PREOP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): shift = (S1[4 : 0].u32) * 53 if exponent(S0.f64) > 1077: shift += exponent(S0.f64) - 1077 @@ -12283,92 +4429,33 @@ def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if exponent(S0.f64) >= 1968: scale += 128 D0.f64 = ldexp(result, scale) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 << S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 << S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = (S1.i16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = (S1.i16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 << S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 << S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (S1.i64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (S1.i64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE32 then - # lane = S1.u32[4 : 0].u32; - # // Lane select for wave32 - # else - # lane = S1.u32[5 : 0].u32; - # // Lane select for wave64 - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3Op_V_READLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if WAVE32: @@ -12376,42 +4463,19 @@ def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V else: lane = S1.u32[5 : 0].u32 D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 & S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 & S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 | S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 | S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 ^ S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 ^ S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3Op_FUNCTIONS = { VOP3Op.V_CMP_F_F16: _VOP3Op_V_CMP_F_F16, @@ -12834,102 +4898,26 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_XOR_B16: _VOP3Op_V_XOR_B16, } -def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_ADD_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUB_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUBREV_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # D0.f32 = NAN.f32 - # elsif exponent(S2.f32) - exponent(S1.f32) >= 96 then - # // N/D near MAX_FLOAT_F32 - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif S1.f32 == DENORM.f32 then - # D0.f32 = ldexp(S0.f32, 64) - # elsif ((1.0 / 64'F(S1.f32) == DENORM.f64) && (S2.f32 / S1.f32 == DENORM.f32)) then - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif 1.0 / 64'F(S1.f32) == DENORM.f64 then - # D0.f32 = ldexp(S0.f32, -64) - # elsif S2.f32 / S1.f32 == DENORM.f32 then - # VCC = 0x1LL; - # if S0.f32 == S2.f32 then - # // Only scale the numerator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif exponent(S2.f32) <= 23 then - # // Numerator is tiny - # D0.f32 = ldexp(S0.f32, 64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3SDOp_V_DIV_SCALE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((F(S2.f32) == 0.0) or (F(S1.f32) == 0.0)): @@ -12952,47 +4940,10 @@ def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal VCC = Reg(0x1); D0.f32 = ldexp(S0.f32, 64) if S1.f32 == DENORM.f32: D0.f32 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then - # D0.f64 = NAN.f64 - # elsif exponent(S2.f64) - exponent(S1.f64) >= 768 then - # // N/D near MAX_FLOAT_F64 - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, 128) - # elsif ((1.0 / S1.f64 == DENORM.f64) && (S2.f64 / S1.f64 == DENORM.f64)) then - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif 1.0 / S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, -128) - # elsif S2.f64 / S1.f64 == DENORM.f64 then - # VCC = 0x1LL; - # if S0.f64 == S2.f64 then - # // Only scale the numerator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif exponent(S2.f64) <= 53 then - # // Numerator is tiny - # D0.f64 = ldexp(S0.f64, 128) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3SDOp_V_DIV_SCALE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((S2.f64 == 0.0) or (S1.f64 == 0.0)): @@ -13015,105 +4966,41 @@ def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal D0.f64 = ldexp(S0.f64, 128) if S1.f64 == DENORM.f64: D0.f64 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3SDOp_V_MAD_U64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.u32) * (S1.u32) + (S2.u64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} -def _VOP3SDOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3SDOp_V_MAD_I64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.i32) * (S1.i32) + (S2.i64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} -def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32; - # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32) VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_ADD_CO_CI_U32: _VOP3SDOp_V_ADD_CO_CI_U32, @@ -13128,373 +5015,175 @@ VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_SUBREV_CO_U32: _VOP3SDOp_V_SUBREV_CO_U32, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; - # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16 tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; - # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16 tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; - # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16 tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32) tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 >= S1[31 : 16].i16) else (S1[31 : 16].i16)) tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 >= S1[15 : 0].i16) else (S1[15 : 0].i16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 < S1[31 : 16].i16) else (S1[31 : 16].i16)) tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 < S1[15 : 0].i16) else (S1[15 : 0].i16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 >= S1[31 : 16].u16) else (S1[31 : 16].u16)) tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 >= S1[15 : 0].u16) else (S1[15 : 0].u16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 < S1[31 : 16].u16) else (S1[31 : 16].u16)) tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 < S1[15 : 0].u16) else (S1[15 : 0].u16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); - # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16) tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; - # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16 tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; - # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16 tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16) tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16) tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); - # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); - # tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8); - # tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8) tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8) tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8) tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); - # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); - # tmp += u4_to_u32(S0[11 : 8].u4) * u4_to_u32(S1[11 : 8].u4); - # tmp += u4_to_u32(S0[15 : 12].u4) * u4_to_u32(S1[15 : 12].u4); - # tmp += u4_to_u32(S0[19 : 16].u4) * u4_to_u32(S1[19 : 16].u4); - # tmp += u4_to_u32(S0[23 : 20].u4) * u4_to_u32(S1[23 : 20].u4); - # tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4); - # tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT8_U32_U4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4) tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4) @@ -13505,28 +5194,14 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4) tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); - # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16) tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3POp_FUNCTIONS = { VOP3POp.V_PK_MAD_I16: _VOP3POp_V_PK_MAD_I16, @@ -13554,1937 +5229,375 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_DOT2_F32_BF16: _VOP3POp_V_DOT2_F32_BF16, } -def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -15498,54 +5611,9 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -15559,54 +5627,9 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -15620,1245 +5643,377 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 < S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f64 == S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 > S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <> S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 >= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 < S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i16 == S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 > S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <> S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 >= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 < S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u16 == S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 > S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <> S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 >= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 < S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i64 == S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 > S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <> S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 >= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 < S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 > S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 >= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -16872,46 +6027,9 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -16925,46 +6043,9 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -16978,10 +6059,7 @@ def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} VOPCOp_FUNCTIONS = { VOPCOp.V_CMP_F_F16: _VOPCOp_V_CMP_F_F16, diff --git a/extra/assembly/amd/autogen/rdna4/gen_pcode.py b/extra/assembly/amd/autogen/rdna4/gen_pcode.py index 988b5ccec5..c7331ddd59 100644 --- a/extra/assembly/amd/autogen/rdna4/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna4/gen_pcode.py @@ -5,1384 +5,452 @@ from extra.assembly.amd.autogen.rdna4.enum import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b64 = S0.b64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b32 = S0.b32 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b64 = S0.b64 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[63 : 0] = S0.u64[0 : 63] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[63 : 0] = S0.u64[0 : 63] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from LSB - # if S0.u64[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CTZ_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from MSB - # if S0.u64[63 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLZ_I32_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[63 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.u32[31 - i] != S0.u32[31] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(31)+1): if S0.u32[31 - i] != S0.u32[31]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 63 do - # // Search from MSB - # if S0.u64[63 - i] != S0.u64[63] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLS_I32_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(63)+1): if S0.u64[63 - i] != S0.u64[63]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i8)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i8)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32; - # for i in 0 : 31 do - # D0.u64[i * 2] = tmp[i]; - # D0.u64[i * 2 + 1] = tmp[i] - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITREPLICATE_B64_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32) for i in range(0, int(31)+1): D0.u64[i * 2] = tmp[i] D0.u64[i * 2 + 1] = tmp[i] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_ABS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((-S0.i32) if (S0.i32 < 0) else (S0.i32)) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # for i in 0 : 7 do - # tmp[i] = S0.u32[i * 4 +: 4] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(7)+1): tmp[i] = S0.u32[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # for i in 0 : 15 do - # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(15)+1): tmp[i] = S0.u64[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # declare i : 6'U; - # for i in 6'0U : 6'31U do - # tmp[i] = S0.u32[i & 6'60U +: 6'4U] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp[i] = S0.u32[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # declare i : 6'U; - # for i in 6'0U : 6'63U do - # tmp[i] = S0.u64[i & 6'60U +: 6'4U] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp[i] = S0.u64[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~S0.u64; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~S0.u64 SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set - # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set - # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 ^ EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 ^ EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NAND_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NAND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 ^ EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XNOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 ^ EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XNOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u32; - # EXEC.u32 = (~S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (~S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u32; - # EXEC.u32 = (~S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (~S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 & ~EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 & ~EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 | ~EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 | ~EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u32 = (~S0.u32 & EXEC.u32); - # D0.u32 = EXEC.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_WREXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u32 = (~S0.u32 & EXEC.u32) D0.u32 = EXEC.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u32 = (S0.u32 & ~EXEC.u32); - # D0.u32 = EXEC.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_WREXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u32 = (S0.u32 & ~EXEC.u32) D0.u32 = EXEC.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_GETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = PC + 4 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # jump_addr = S0.i64; - # D0.i64 = PC + 4LL; - # PC = jump_addr.i64 - S0 = Reg(s0) - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SWAPPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): jump_addr = S0.i64 D0.i64 = PC + 4 PC = Reg(jump_addr.i64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} -def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_RFE_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If SDST is VCC then VCCZ is undefined. - VCC = Reg(vcc) - VCCZ = Reg(1 if VCC._val == 0 else 0) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result +def _SOP1Op_S_SENDMSG_RTN_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If SDST is VCC then VCCZ is undefined. - VCC = Reg(vcc) - VCCZ = Reg(1 if VCC._val == 0 else 0) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result +def _SOP1Op_S_SENDMSG_RTN_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOP1Op_S_BARRIER_SIGNAL(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if !InWorkgroup() then - # elsif ((barrierNumber == -2) && !WAVE_STATUS.PRIV) then - # elsif barrierNumber == 0 then - # else - # BARRIER_STATE[barrierNumber & 63].signalCnt += 7'1U - # endif; - # --- compiled pseudocode --- - if not InWorkgroup(): - pass - elif ((barrierNumber == -2) and not WAVE_STATUS.PRIV): - pass - elif barrierNumber == 0: - pass - else: - BARRIER_STATE[barrierNumber & 63].signalCnt += 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result +def _SOP1Op_S_SLEEP_VAR(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOP1Op_S_BARRIER_SIGNAL_ISFIRST(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if !InWorkgroup() then - # SCC = 1'0U - # elsif ((barrierNumber == -2) && !WAVE_STATUS.PRIV) then - # SCC = 1'0U - # elsif barrierNumber == 0 then - # SCC = 1'0U - # else - # // Set SCC if this is the first signaling event for this barrier. - # SCC = BARRIER_STATE[barrierNumber & 63].signalCnt.u32 == 0U; - # BARRIER_STATE[barrierNumber & 63].signalCnt += 7'1U - # endif; - SCC = Reg(scc) - # --- compiled pseudocode --- - if not InWorkgroup(): - SCC = Reg(0) - elif ((barrierNumber == -2) and not WAVE_STATUS.PRIV): - SCC = Reg(0) - elif barrierNumber == 0: - SCC = Reg(0) - else: - SCC = Reg(BARRIER_STATE[barrierNumber & 63].signalCnt.u32 == 0) - BARRIER_STATE[barrierNumber & 63].signalCnt += 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOP1Op_S_GET_BARRIER_STATE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U({ 9'0, BARRIER_STATE[barrierNumber & 63].signalCnt.u7, 5'0, BARRIER_STATE[barrierNumber & - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.u32 = ({ 0, BARRIER_STATE[barrierNumber & 63].signalCnt.u7, 0, BARRIER_STATE[barrierNumber] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _SOP1Op_S_ALLOC_VGPR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # n = ReallocVgprs(32'I(S0[8 : 0].u32)); - # if n < 0 then - # SCC = 1'0U - # else - # NUM_VGPRS = n; - # SCC = 1'1U - # endif - S0 = Reg(s0) - SCC = Reg(scc) - # --- compiled pseudocode --- - n = ReallocVgprs((S0[8 : 0].u32)) - if n < 0: - SCC = Reg(0) - else: - NUM_VGPRS = n - SCC = Reg(1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOP1Op_S_SLEEP_VAR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S0[6:0] determines the sleep duration. The wave sleeps for (64*(S0[6:0]-1) … 64*S0[6:0]) clocks. The exact - S0 = Reg(s0) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result - -def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0[31 : 16].f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_HI_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0[31 : 16].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} SOP1Op_FUNCTIONS = { SOP1Op.S_MOV_B32: _SOP1Op_S_MOV_B32, @@ -1445,10 +513,6 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_RFE_B64: _SOP1Op_S_RFE_B64, SOP1Op.S_SENDMSG_RTN_B32: _SOP1Op_S_SENDMSG_RTN_B32, SOP1Op.S_SENDMSG_RTN_B64: _SOP1Op_S_SENDMSG_RTN_B64, - SOP1Op.S_BARRIER_SIGNAL: _SOP1Op_S_BARRIER_SIGNAL, - SOP1Op.S_BARRIER_SIGNAL_ISFIRST: _SOP1Op_S_BARRIER_SIGNAL_ISFIRST, - SOP1Op.S_GET_BARRIER_STATE: _SOP1Op_S_GET_BARRIER_STATE, - SOP1Op.S_ALLOC_VGPR: _SOP1Op_S_ALLOC_VGPR, SOP1Op.S_SLEEP_VAR: _SOP1Op_S_SLEEP_VAR, SOP1Op.S_CEIL_F32: _SOP1Op_S_CEIL_F32, SOP1Op.S_FLOOR_F32: _SOP1Op_S_FLOOR_F32, @@ -1467,802 +531,282 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_RNDNE_F16: _SOP1Op_S_RNDNE_F16, } -def _SOP2Op_S_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) SCC = Reg(((1) if (S1.u32 > S0.u32) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADD_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 + S1.i32; - # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_CO_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 + S1.i32) SCC = Reg(((S0.u32[31] == S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 - S1.i32; - # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_CO_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 - S1.i32) SCC = Reg(((S0.u32[31] != S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + SCC.u64) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - SCC.u32; - # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - SCC.u32) SCC = Reg(((1) if ((S1.u32) + SCC.u64 > (S0.u32)) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32; - # if D0.i32 < 0 then - # D0.i32 = -D0.i32 - # endif; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ABSDIFF_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 if D0.i32 < 0: D0.i32 = -D0.i32 SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 << S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 << S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 >> S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 >> S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 >> S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 >> S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i32) >> S1[4 : 0].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL1_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 1) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL2_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 2) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL3_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 3) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL4_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 4) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_NOT1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_NOT1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_NOT1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_NOT1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); - # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (((1 << S0[5 : 0].u32) - 1) << S1[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 * S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 * S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = SCC ? S0.u64 : S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64) if (SCC) else (S1.u64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LL_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[15 : 0].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_HH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[31 : 16].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[15 : 0].u16, S0[31 : 16].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_HL_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[15 : 0].u16, S0[31 : 16].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && - # !sign(S1.f32))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -2275,31 +819,9 @@ def _SOP2Op_S_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && - # sign(S1.f32))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -2312,114 +834,45 @@ def _SOP2Op_S_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _SOP2Op_S_FMAAK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _SOP2Op_S_FMAMK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _SOP2Op_S_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && - # !sign(S1.f16))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -2432,31 +885,9 @@ def _SOP2Op_S_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && - # sign(S1.f16))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -2469,55 +900,17 @@ def _SOP2Op_S_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S1.f32 - # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && - # !sign(S1.f32))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MINIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f32)): @@ -2532,33 +925,9 @@ def _SOP2Op_S_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S1.f32 - # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && - # sign(S1.f32))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAXIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f32)): @@ -2573,33 +942,9 @@ def _SOP2Op_S_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S1.f16 - # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && - # !sign(S1.f16))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MINIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f16)): @@ -2614,33 +959,9 @@ def _SOP2Op_S_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S1.f16 - # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && - # sign(S1.f16))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAXIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f16)): @@ -2655,45 +976,19 @@ def _SOP2Op_S_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_ADD_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = S0.u64 + S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_NC_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = S0.u64 + S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_SUB_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = S0.u64 - S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_NC_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = S0.u64 - S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_MUL_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = S0.u64 * S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = S0.u64 * S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} SOP2Op_FUNCTIONS = { SOP2Op.S_ADD_CO_U32: _SOP2Op_S_ADD_CO_U32, @@ -2772,523 +1067,189 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_MUL_U64: _SOP2Op_S_MUL_U64, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 == S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 != S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 > S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 <= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 == S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 != S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 > S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 <= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 == S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 != S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg((isNAN(F(S0.f32)) or isNAN(F(S1.f32)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg((isNAN(F(S0.f16)) or isNAN(F(S1.f16)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 >= S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 >= S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 != S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 != S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 > S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 > S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 <= S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 <= S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 == S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 == S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 < S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 < S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_EQ_I32: _SOPCOp_S_CMP_EQ_I32, @@ -3339,79 +1300,34 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_NLT_F16: _SOPCOp_S_CMP_NLT_F16, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOPKOp_S_MOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Do nothing - for use by tools only - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result +def _SOPKOp_S_VERSION(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.i32 = 32'I(signext(S0.i16)) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOPKOp_S_ADDK_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # D0.i32 = D0.i32 + 32'I(signext(S0.i16)); - # SCC = ((tmp[31] == S0.i16[15]) && (tmp[31] != D0.i32[31])); - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOPKOp_S_ADDK_CO_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) D0.i32 = D0.i32 + (signext(S0.i16)) SCC = Reg(((tmp[31] == S0.i16[15]) and (tmp[31] != D0.i32[31]))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = D0.i32 * 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOPKOp_S_MULK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = D0.i32 * (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL; - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - D0 = Reg(d0) +def _SOPKOp_S_CALL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- D0.i64 = PC + 4 PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, @@ -3422,194 +1338,85 @@ SOPKOp_FUNCTIONS = { SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # for i in 0U : SIMM16.u16[3 : 0].u32 do - # endfor +def _SOPPOp_S_NOP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- for i in range(0, int(SIMM16.u16[3 : 0].u32)+1): pass - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # instruction may be omitted. For wave64 the compiler may not know the status of the EXEC mask and hence - # // 1 cycle delay here - # // 2 cycles delay here - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result +def _SOPPOp_S_DELAY_ALU(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // PC passed into trap handler points to S_TRAP itself, - # PC = TBA.i64; - # // trap base address - PC = Reg(pc) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result +def _SOPPOp_S_TRAP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {'PC': PC} -def _SOPPOp_S_BARRIER_WAIT(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // barrierBit 0: reserved - # // barrierBit 1: workgroup - # // barrierBit 2: trap - # // Implemented as a power-saving idle - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result +def _SOPPOp_S_BARRIER_WAIT(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; +def _SOPPOp_S_BRANCH(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 1 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 0 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, @@ -3625,40 +1432,11 @@ SOPPOp_FUNCTIONS = { SOPPOp.S_CBRANCH_EXECNZ: _SOPPOp_S_CBRANCH_EXECNZ, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE64 then - # // 64 lanes - # if EXEC == 0x0LL then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b64(EXEC)); - # // Lowest active lane - # endif - # else - # // 32 lanes - # if EXEC_LO.i32 == 0 then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b32(EXEC_LO)); - # // Lowest active lane - # endif - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP1Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) EXEC_LO = SliceProxy(EXEC, 31, 0) # --- compiled pseudocode --- @@ -3673,935 +1451,368 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter else: lane = (s_ff1_i32_b32(EXEC_LO)) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NEAREST_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_FLOOR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = S0.b16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = S0.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # addr = SRC0.u32; - # // Raw value from instruction - # D0.b32 = VGPR[laneId][addr].b32 - D0 = Reg(d0) - laneId = lane +def _VOP1Op_V_MOVRELS_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- addr = SRC0.u32 D0.b32 = VGPR[laneId][addr].b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.f16 = S0.f16 - # else - # D0.f16 = mantissa(S0.f16) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.f16 = S0.f16 else: D0.f16 = mantissa(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.i16 = 16'0 - # else - # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.i16 = 0 else: D0.i16 = (exponent(S0.f16) - 15 + 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + -floor(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + -floor(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sin(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = cos(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 16'0; - # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); - # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); - # D0.b16 = tmp.b16 - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SAT_PK_U8_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) D0.b16 = tmp.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_snorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_snorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_unorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_unorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.b32; - # D0.b32 = S0.b32; - # S0.b32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SWAP_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.b32) D0.b32 = S0.b32 S0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.b16; - # D0.b16 = S0.b16; - # S0.b16 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SWAP_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.b16) D0.b16 = S0.b16 S0.b16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = ~S0.u16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_NOT_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ~S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { 16'0, S0.u16 } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(0, S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if OPSEL[1 : 0].u2 == 2'0U then - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) - # elsif OPSEL[1 : 0].u2 == 2'2U then - # // Byte select bits are reversed - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].fp8) - # elsif OPSEL[1 : 0].u2 == 2'1U then - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) - # else - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) - # endif - D0 = Reg(d0) - laneId = lane +def _VOP1Op_V_CVT_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if OPSEL[1 : 0].u2 == 0: @@ -4612,23 +1823,9 @@ def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) else: D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if OPSEL[1 : 0].u2 == 2'0U then - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) - # elsif OPSEL[1 : 0].u2 == 2'2U then - # // Byte select bits are reversed - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].bf8) - # elsif OPSEL[1 : 0].u2 == 2'1U then - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) - # else - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) - # endif - D0 = Reg(d0) - laneId = lane +def _VOP1Op_V_CVT_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if OPSEL[1 : 0].u2 == 0: @@ -4639,41 +1836,23 @@ def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) else: D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; - # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); - # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - D0 = Reg(d0) - tmp = Reg(0) - laneId = lane +def _VOP1Op_V_CVT_PK_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8) D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; - # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); - # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - D0 = Reg(d0) - tmp = Reg(0) - laneId = lane +def _VOP1Op_V_CVT_PK_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8) D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP1Op_FUNCTIONS = { VOP1Op.V_MOV_B32: _VOP1Op_V_MOV_B32, @@ -4760,172 +1939,58 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_PK_F32_BF8: _VOP1Op_V_CVT_PK_F32_BF8, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP2Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = 0.0F - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = 0.0 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(S0.f64) && isNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 < S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && sign(S0.f64) && - # !sign(S1.f64))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_NUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if (isNAN(S0.f64) and isNAN(S1.f64)): @@ -4938,32 +2003,9 @@ def _VOP2Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(S0.f64) && isNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 > S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && !sign(S0.f64) && - # sign(S1.f64))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_NUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if (isNAN(S0.f64) and isNAN(S1.f64)): @@ -4976,76 +2018,25 @@ def _VOP2Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && - # !sign(S1.f32))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -5058,31 +2049,9 @@ def _VOP2Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && - # sign(S1.f32))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -5095,263 +2064,95 @@ def _VOP2Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 << S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHLREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 << S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAMK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAAK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP2Op_V_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP2Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && - # !sign(S1.f16))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -5364,31 +2165,9 @@ def _VOP2Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && - # sign(S1.f16))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -5401,112 +2180,48 @@ def _VOP2Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAMK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAAK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16); - # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_PK_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP2Op_FUNCTIONS = { VOP2Op.V_CNDMASK_B32: _VOP2Op_V_CNDMASK_B32, @@ -5560,1671 +2275,319 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, } -def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -7238,54 +2601,9 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -7299,54 +2617,9 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -7360,1091 +2633,321 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 < S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f64 == S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 > S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <> S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 >= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 < S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i16 == S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 > S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <> S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 >= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 < S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u16 == S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 > S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <> S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 >= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 < S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i64 == S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 > S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <> S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 >= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 < S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 > S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 >= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -8458,46 +2961,9 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -8511,46 +2977,9 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -8564,45 +2993,13 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE64 then - # // 64 lanes - # if EXEC == 0x0LL then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b64(EXEC)); - # // Lowest active lane - # endif - # else - # // 32 lanes - # if EXEC_LO.i32 == 0 then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b32(EXEC_LO)); - # // Lowest active lane - # endif - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP3Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) EXEC_LO = SliceProxy(EXEC, 31, 0) # --- compiled pseudocode --- @@ -8617,905 +3014,356 @@ def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter else: lane = (s_ff1_i32_b32(EXEC_LO)) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NEAREST_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_FLOOR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = S0.b16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MOV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = S0.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # addr = SRC0.u32; - # // Raw value from instruction - # D0.b32 = VGPR[laneId][addr].b32 - D0 = Reg(d0) - laneId = lane +def _VOP3Op_V_MOVRELS_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- addr = SRC0.u32 D0.b32 = VGPR[laneId][addr].b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.f16 = S0.f16 - # else - # D0.f16 = mantissa(S0.f16) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.f16 = S0.f16 else: D0.f16 = mantissa(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.i16 = 16'0 - # else - # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.i16 = 0 else: D0.i16 = (exponent(S0.f16) - 15 + 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + -floor(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + -floor(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sin(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_COS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = cos(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 16'0; - # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); - # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); - # D0.b16 = tmp.b16 - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAT_PK_U8_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) D0.b16 = tmp.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_snorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_snorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_unorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_unorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = ~S0.u16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_NOT_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ~S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { 16'0, S0.u16 } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(0, S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if OPSEL[1 : 0].u2 == 2'0U then - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) - # elsif OPSEL[1 : 0].u2 == 2'2U then - # // Byte select bits are reversed - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].fp8) - # elsif OPSEL[1 : 0].u2 == 2'1U then - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) - # else - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) - # endif - D0 = Reg(d0) - laneId = lane +def _VOP3Op_V_CVT_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if OPSEL[1 : 0].u2 == 0: @@ -9526,23 +3374,9 @@ def _VOP3Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) else: D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if OPSEL[1 : 0].u2 == 2'0U then - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) - # elsif OPSEL[1 : 0].u2 == 2'2U then - # // Byte select bits are reversed - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].bf8) - # elsif OPSEL[1 : 0].u2 == 2'1U then - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) - # else - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) - # endif - D0 = Reg(d0) - laneId = lane +def _VOP3Op_V_CVT_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if OPSEL[1 : 0].u2 == 0: @@ -9553,208 +3387,76 @@ def _VOP3Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) else: D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; - # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); - # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - D0 = Reg(d0) - tmp = Reg(0) - laneId = lane +def _VOP3Op_V_CVT_PK_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8) D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; - # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); - # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - D0 = Reg(d0) - tmp = Reg(0) - laneId = lane +def _VOP3Op_V_CVT_PK_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8) D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = 0.0F - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = 0.0 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(S0.f64) && isNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 < S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && sign(S0.f64) && - # !sign(S1.f64))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_NUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if (isNAN(S0.f64) and isNAN(S1.f64)): @@ -9767,32 +3469,9 @@ def _VOP3Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(S0.f64) && isNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 > S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && !sign(S0.f64) && - # sign(S1.f64))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_NUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if (isNAN(S0.f64) and isNAN(S1.f64)): @@ -9805,76 +3484,25 @@ def _VOP3Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && - # !sign(S1.f32))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -9887,31 +3515,9 @@ def _VOP3Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && - # sign(S1.f32))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -9924,179 +3530,65 @@ def _VOP3Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 << S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 << S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && - # !sign(S1.f16))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -10109,31 +3601,9 @@ def _VOP3Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && - # sign(S1.f16))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -10146,150 +3616,48 @@ def _VOP3Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = S2.f32 - # else - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = S2.f32 else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = 5.0F - # else - # D0.f32 = 4.0F - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = 3.0F - # else - # D0.f32 = 2.0F - # endif - # else - # if S0.f32 < 0.0F then - # D0.f32 = 1.0F - # else - # D0.f32 = 0.0F - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBEID_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = 5.0 @@ -10305,36 +3673,9 @@ def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = 1.0 else: D0.f32 = 0.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap S coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = -S0.f32 - # else - # D0.f32 = S0.f32 - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S0.f32 - # else - # if S0.f32 < 0.0F then - # D0.f32 = S2.f32 - # else - # D0.f32 = -S2.f32 - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBESC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = -S0.f32 @@ -10347,32 +3688,9 @@ def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = S2.f32 else: D0.f32 = -S2.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap T coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = -S1.f32 - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = -S2.f32 - # else - # D0.f32 = S2.f32 - # endif - # else - # D0.f32 = -S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBETC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = -S1.f32 elif abs(S1.f32) >= abs(S0.f32): @@ -10382,377 +3700,128 @@ def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = S2.f32 else: D0.f32 = -S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = 2.0 * cubemap major axis. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = S2.f32 * 2.0F - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S1.f32 * 2.0F - # else - # D0.f32 = S0.f32 * 2.0F - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBEMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = S2.f32 * 2.0 elif abs(S1.f32) >= abs(S0.f32): D0.f32 = S1.f32 * 2.0 else: D0.f32 = S0.f32 * 2.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFI_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); - # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); - # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); - # tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1U); - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_LERP_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1 << 24)) tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1 << 16) tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1 << 8) tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0]) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ALIGNBIT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> S2.u32[4 : 0]) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0] * 8U)) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ALIGNBYTE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> (S2.u32[1 : 0] * 8)) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S1.f32 == -MAX_FLOAT_F32) || (64'F(S1.f32) == -INF) || isNAN(64'F(S1.f32)) || (S2.f32 <= 0.0F) || - # isNAN(64'F(S2.f32))) then - # D0.f32 = -MAX_FLOAT_F32 - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MULLIT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S1.f32 == -MAX_FLOAT_F32) or (F(S1.f32) == (-INF)) or isNAN(F(S1.f32)) or (S2.f32 <= 0.0) or isNAN(F(S2.f32))): D0.f32 = -MAX_FLOAT_F32 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then - # D0.i32 = v_max_i32(S1.i32, S2.i32) - # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then - # D0.i32 = v_max_i32(S0.i32, S2.i32) - # else - # D0.i32 = v_max_i32(S0.i32, S1.i32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32: D0.i32 = v_max_i32(S1.i32, S2.i32) elif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32: D0.i32 = v_max_i32(S0.i32, S2.i32) else: D0.i32 = v_max_i32(S0.i32, S1.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then - # D0.u32 = v_max_u32(S1.u32, S2.u32) - # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then - # D0.u32 = v_max_u32(S0.u32, S2.u32) - # else - # D0.u32 = v_max_u32(S0.u32, S1.u32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32: D0.u32 = v_max_u32(S1.u32, S2.u32) elif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32: D0.u32 = v_max_u32(S0.u32, S2.u32) else: D0.u32 = v_max_u32(S0.u32, S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += (ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])) tmp += (ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])) tmp += (ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])) tmp += (ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_HI_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((v_sad_u8(S0, S1, 0)) << 16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); - # tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16) tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); - # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_PK_U8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S2.u32 & (~(0xff << (S1.u32[1 : 0].u32 * 8))))) tmp = Reg((tmp | (((f32_to_u8(S0.f32)) & 255) << (S1.u32[1 : 0].u32 * 8)))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f32) ^ sign(S2.f32)); - # if isNAN(64'F(S2.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif ((64'F(S1.f32) == 0.0) && (64'F(S2.f32) == 0.0)) then - # // 0/0 - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(abs(S1.f32)) == +INF) && (64'F(abs(S2.f32)) == +INF)) then - # // inf/inf - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(S1.f32) == 0.0) || (64'F(abs(S2.f32)) == +INF)) then - # // x/0, or inf/y - # D0.f32 = sign_out ? -INF.f32 : +INF.f32 - # elsif ((64'F(abs(S1.f32)) == +INF) || (64'F(S2.f32) == 0.0)) then - # // x/inf, 0/y - # D0.f32 = sign_out ? -0.0F : 0.0F - # elsif exponent(S2.f32) - exponent(S1.f32) < -150 then - # D0.f32 = sign_out ? -UNDERFLOW_F32 : UNDERFLOW_F32 - # elsif exponent(S1.f32) == 255 then - # D0.f32 = sign_out ? -OVERFLOW_F32 : OVERFLOW_F32 - # else - # D0.f32 = sign_out ? -abs(S0.f32) : abs(S0.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f32) ^ sign(S2.f32)) if isNAN(F(S2.f32)): D0.f32 = F(cvtToQuietNAN(F(S2.f32))) @@ -10772,40 +3841,9 @@ def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) else: D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) if isNAN(S0.f32) else ((-abs(S0.f32)) if (sign_out) else (abs(S0.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f64) ^ sign(S2.f64)); - # if isNAN(S2.f64) then - # D0.f64 = cvtToQuietNAN(S2.f64) - # elsif isNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif ((S1.f64 == 0.0) && (S2.f64 == 0.0)) then - # // 0/0 - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((abs(S1.f64) == +INF) && (abs(S2.f64) == +INF)) then - # // inf/inf - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((S1.f64 == 0.0) || (abs(S2.f64) == +INF)) then - # // x/0, or inf/y - # D0.f64 = sign_out ? -INF : +INF - # elsif ((abs(S1.f64) == +INF) || (S2.f64 == 0.0)) then - # // x/inf, 0/y - # D0.f64 = sign_out ? -0.0 : 0.0 - # elsif exponent(S2.f64) - exponent(S1.f64) < -1075 then - # D0.f64 = sign_out ? -UNDERFLOW_F64 : UNDERFLOW_F64 - # elsif exponent(S1.f64) == 2047 then - # D0.f64 = sign_out ? -OVERFLOW_F64 : OVERFLOW_F64 - # else - # D0.f64 = sign_out ? -abs(S0.f64) : abs(S0.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f64) ^ sign(S2.f64)) if isNAN(S2.f64): D0.f64 = cvtToQuietNAN(S2.f64) @@ -10825,122 +3863,41 @@ def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) else: D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_min_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_min_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_max_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_max_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_min_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_min_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_max_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_max_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUM3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUM3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_minimum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUM3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_minimum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_maximum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUM3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_maximum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then - # D0.f32 = v_min3_num_f32(S0.f32, S1.f32, S2.f32) - # elsif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then - # D0.f32 = v_max_num_f32(S1.f32, S2.f32) - # elsif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S1.f32 then - # D0.f32 = v_max_num_f32(S0.f32, S2.f32) - # else - # D0.f32 = v_max_num_f32(S0.f32, S1.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f32)) or isNAN(F(S1.f32)) or isNAN(F(S2.f32))): D0.f32 = v_min3_num_f32(S0.f32, S1.f32, S2.f32) elif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S0.f32: @@ -10949,25 +3906,9 @@ def _VOP3Op_V_MED3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0.f32 = v_max_num_f32(S0.f32, S2.f32) else: D0.f32 = v_max_num_f32(S0.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then - # D0.f16 = v_min3_num_f16(S0.f16, S1.f16, S2.f16) - # elsif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then - # D0.f16 = v_max_num_f16(S1.f16, S2.f16) - # elsif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S1.f16 then - # D0.f16 = v_max_num_f16(S0.f16, S2.f16) - # else - # D0.f16 = v_max_num_f16(S0.f16, S1.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f16)) or isNAN(F(S1.f16)) or isNAN(F(S2.f16))): D0.f16 = v_min3_num_f16(S0.f16, S1.f16, S2.f16) elif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S0.f16: @@ -10976,89 +3917,32 @@ def _VOP3Op_V_MED3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0.f16 = v_max_num_f16(S0.f16, S2.f16) else: D0.f16 = v_max_num_f16(S0.f16, S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) - # else - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FMAS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f32 = (2.0 ** 64 if exponent(S2.f32) > 127 else 2.0 ** -64) * fma(S0.f32, S1.f32, S2.f32) else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) - # else - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FMAS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f64 = (2.0 ** 128 if exponent(S2.f64) > 1023 else 2.0 ** -128) * fma(S0.f64, S1.f64, S2.f64) else: D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += S1.u32[15 : 8] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += S1.u32[23 : 16] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += S1.u32[31 : 24] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_MSAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ((0) if (S1.u32[7 : 0] == 0) else ((ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])))) tmp += ((0) if (S1.u32[15 : 8] == 0) else ((ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])))) tmp += ((0) if (S1.u32[23 : 16] == 0) else ((ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])))) tmp += ((0) if (S1.u32[31 : 24] == 0) else ((ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_QSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -11066,21 +3950,9 @@ def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, tmp[31 : 16] = (v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_MQSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -11088,21 +3960,9 @@ def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal tmp[31 : 16] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); - # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); - # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); - # tmp[31 : 0] = 32'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)); - # D0.b128 = tmp.b128 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_MQSAD_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[127 : 96] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)) @@ -11110,232 +3970,78 @@ def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp[63 : 32] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)) tmp[31 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)) D0.b128 = tmp.b128 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR3_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 + S2.u16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 + S2.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); - # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); - # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); - # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_PERM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHL_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_LSHL_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then - # D0.i16 = v_max_i16(S1.i16, S2.i16) - # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then - # D0.i16 = v_max_i16(S0.i16, S2.i16) - # else - # D0.i16 = v_max_i16(S0.i16, S1.i16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16: D0.i16 = v_max_i16(S1.i16, S2.i16) elif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16: D0.i16 = v_max_i16(S0.i16, S2.i16) else: D0.i16 = v_max_i16(S0.i16, S1.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then - # D0.u16 = v_max_u16(S1.u16, S2.u16) - # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then - # D0.u16 = v_max_u16(S0.u16, S2.u16) - # else - # D0.u16 = v_max_u16(S0.u16, S1.u16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16: D0.u16 = v_max_u16(S1.u16, S2.u16) elif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16: D0.u16 = v_max_u16(S0.u16, S2.u16) else: D0.u16 = v_max_u16(S0.u16, S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 * S1.i16 + S2.i16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 * S1.i16 + S2.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f16) ^ sign(S2.f16)); - # if isNAN(64'F(S2.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif ((64'F(S1.f16) == 0.0) && (64'F(S2.f16) == 0.0)) then - # // 0/0 - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(abs(S1.f16)) == +INF) && (64'F(abs(S2.f16)) == +INF)) then - # // inf/inf - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(S1.f16) == 0.0) || (64'F(abs(S2.f16)) == +INF)) then - # // x/0, or inf/y - # D0.f16 = sign_out ? -INF.f16 : +INF.f16 - # elsif ((64'F(abs(S1.f16)) == +INF) || (64'F(S2.f16) == 0.0)) then - # // x/inf, 0/y - # D0.f16 = sign_out ? -16'0.0 : 16'0.0 - # else - # D0.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f16) ^ sign(S2.f16)) if isNAN(F(S2.f16)): D0.f16 = F(cvtToQuietNAN(F(S2.f16))) @@ -11351,739 +4057,280 @@ def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f16 = ((-0.0) if (sign_out) else (0.0)) else: D0.f16 = ((-abs(S0.f16)) if (sign_out) else (abs(S0.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHL_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32 | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR3_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32 | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u16) * (S1.u16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i16) * (S1.i16) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = VCC.u64[laneId] ? S1.u16 : S0.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CNDMASK_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S1.u16) if (VCC.u64[laneId]) else (S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f16; - # tmp += S0[15 : 0].f16 * S1[15 : 0].f16; - # tmp += S0[31 : 16].f16 * S1[31 : 16].f16; - # D0.f16 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_DOT2_F16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f16) tmp += S0[15 : 0].f16 * S1[15 : 0].f16 tmp += S0[31 : 16].f16 * S1[31 : 16].f16 D0.f16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.bf16; - # tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16; - # tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16; - # D0.bf16 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_DOT2_BF16_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.bf16) tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16 tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16 D0.bf16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_max_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_max_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_min_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_min_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_max_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_max_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_min_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_min_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUMMAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_maximum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUMMAXIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_maximum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUMMINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_minimum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUMMINIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_minimum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUMMAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_maximum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUMMAXIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_maximum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUMMINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_minimum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUMMINIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_minimum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16); - # D0[31 : 16] = 16'0x0 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) D0[31 : 16] = 0x0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16); - # D0[31 : 16] = 16'0x0 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) D0[31 : 16] = 0x0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16; - # D0[31 : 16] = 16'0x0 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 D0[31 : 16] = 0x0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16); - # D0[31 : 16] = 16'0x0 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) D0[31 : 16] = 0x0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16); - # D0[31 : 16] = 16'0x0 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) D0[31 : 16] = 0x0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 + S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 + S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 - S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 - S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[31 : 16] = 16'B(v_cvt_i16_f32(S1.f32)); - # tmp[15 : 0] = 16'B(v_cvt_i16_f32(S0.f32)); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_I16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16] = (v_cvt_i16_f32(S1.f32)) tmp[15 : 0] = (v_cvt_i16_f32(S0.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[31 : 16] = 16'B(v_cvt_u16_f32(S1.f32)); - # tmp[15 : 0] = 16'B(v_cvt_u16_f32(S0.f32)); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_U16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16] = (v_cvt_u16_f32(S1.f32)) tmp[15 : 0] = (v_cvt_u16_f32(S0.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 + S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 + S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 - S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 - S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 16].f16 = S1.f16; - # D0[15 : 0].f16 = S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_PACK_B32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 16].f16 = S1.f16 D0[15 : 0].f16 = S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); - # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f16_to_snorm(S0.f16) tmp[31 : 16].i16 = f16_to_snorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); - # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f16_to_unorm(S0.f16) tmp[31 : 16].u16 = f16_to_unorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * 2.0F ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32; - # for i in 0 : 31 do - # tmp += S0[i].u32; - # // count i'th bit - # endfor; - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_BCNT_U32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32) for i in range(0, int(31)+1): tmp += S0[i].u32 D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); - # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_I16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f32_to_snorm(S0.f32) tmp[31 : 16].i16 = f32_to_snorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); - # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_U16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f32_to_unorm(S0.f32) tmp[31 : 16].u16 = f32_to_unorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = u32_to_u16(S0.u32); - # tmp[31 : 16].u16 = u32_to_u16(S1.u32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_U16_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = u32_to_u16(S0.u32) tmp[31 : 16].u16 = u32_to_u16(S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = i32_to_i16(S0.i32); - # tmp[31 : 16].i16 = i32_to_i16(S1.i32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_I16_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = i32_to_i16(S0.i32) tmp[31 : 16].i16 = i32_to_i16(S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 + S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 + S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * 2.0 ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 * S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_LO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 * S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # shift = 32'I(S1[4 : 0].u32) * 53; - # if exponent(S0.f64) > 1077 then - # shift += exponent(S0.f64) - 1077 - # endif; - # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} - # // b_1200 is the MSB of the fractional part of 2.0/PI - # // Left shift operation indicates which bits are brought - # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); - # scale = -53 - shift; - # if exponent(S0.f64) >= 1968 then - # scale += 128 - # endif; - # D0.f64 = ldexp(result, scale) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRIG_PREOP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): shift = (S1[4 : 0].u32) * 53 if exponent(S0.f64) > 1077: shift += exponent(S0.f64) - 1077 @@ -12092,91 +4339,29 @@ def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if exponent(S0.f64) >= 1968: scale += 128 D0.f64 = ldexp(result, scale) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 << S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 << S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = (S1.i16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = (S1.i16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (S1.i64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (S1.i64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(S0.f64) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isSignalNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isQuietNAN(S0.f64) then - # D0.f64 = S0.f64 - # elsif isQuietNAN(S1.f64) then - # D0.f64 = S1.f64 - # elsif ((S0.f64 < S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && sign(S0.f64) && - # !sign(S1.f64))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if isSignalNAN(S0.f64): @@ -12191,34 +4376,9 @@ def _VOP3Op_V_MINIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(S0.f64) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isSignalNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isQuietNAN(S0.f64) then - # D0.f64 = S0.f64 - # elsif isQuietNAN(S1.f64) then - # D0.f64 = S1.f64 - # elsif ((S0.f64 > S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && !sign(S0.f64) && - # sign(S1.f64))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if isSignalNAN(S0.f64): @@ -12233,23 +4393,9 @@ def _VOP3Op_V_MAXIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE32 then - # lane = S1.u32[4 : 0].u32; - # // Lane select for wave32 - # else - # lane = S1.u32[5 : 0].u32; - # // Lane select for wave64 - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3Op_V_READLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if WAVE32: @@ -12257,66 +4403,21 @@ def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V else: lane = S1.u32[5 : 0].u32 D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 & S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 & S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 | S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 | S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 ^ S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 ^ S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S1.f32 - # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && - # !sign(S1.f32))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f32)): @@ -12331,33 +4432,9 @@ def _VOP3Op_V_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S1.f32 - # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && - # sign(S1.f32))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f32)): @@ -12372,33 +4449,9 @@ def _VOP3Op_V_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S1.f16 - # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && - # !sign(S1.f16))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f16)): @@ -12413,33 +4466,9 @@ def _VOP3Op_V_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S1.f16 - # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && - # sign(S1.f16))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f16)): @@ -12454,9 +4483,7 @@ def _VOP3Op_V_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3Op_FUNCTIONS = { VOP3Op.V_CMP_LT_F16: _VOP3Op_V_CMP_LT_F16, @@ -12878,102 +4905,26 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MAXIMUM_F16: _VOP3Op_V_MAXIMUM_F16, } -def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_ADD_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUB_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUBREV_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # D0.f32 = NAN.f32 - # elsif exponent(S2.f32) - exponent(S1.f32) >= 96 then - # // N/D near MAX_FLOAT_F32 - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif S1.f32 == DENORM.f32 then - # D0.f32 = ldexp(S0.f32, 64) - # elsif ((1.0 / 64'F(S1.f32) == DENORM.f64) && (S2.f32 / S1.f32 == DENORM.f32)) then - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif 1.0 / 64'F(S1.f32) == DENORM.f64 then - # D0.f32 = ldexp(S0.f32, -64) - # elsif S2.f32 / S1.f32 == DENORM.f32 then - # VCC = 0x1LL; - # if S0.f32 == S2.f32 then - # // Only scale the numerator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif exponent(S2.f32) <= 23 then - # // Numerator is tiny - # D0.f32 = ldexp(S0.f32, 64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3SDOp_V_DIV_SCALE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((F(S2.f32) == 0.0) or (F(S1.f32) == 0.0)): @@ -12996,47 +4947,10 @@ def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal VCC = Reg(0x1); D0.f32 = ldexp(S0.f32, 64) if S1.f32 == DENORM.f32: D0.f32 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then - # D0.f64 = NAN.f64 - # elsif exponent(S2.f64) - exponent(S1.f64) >= 768 then - # // N/D near MAX_FLOAT_F64 - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, 128) - # elsif ((1.0 / S1.f64 == DENORM.f64) && (S2.f64 / S1.f64 == DENORM.f64)) then - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif 1.0 / S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, -128) - # elsif S2.f64 / S1.f64 == DENORM.f64 then - # VCC = 0x1LL; - # if S0.f64 == S2.f64 then - # // Only scale the numerator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif exponent(S2.f64) <= 53 then - # // Numerator is tiny - # D0.f64 = ldexp(S0.f64, 128) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3SDOp_V_DIV_SCALE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((S2.f64 == 0.0) or (S1.f64 == 0.0)): @@ -13059,105 +4973,41 @@ def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal D0.f64 = ldexp(S0.f64, 128) if S1.f64 == DENORM.f64: D0.f64 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_MAD_CO_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3SDOp_V_MAD_CO_U64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.u32) * (S1.u32) + (S2.u64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} -def _VOP3SDOp_V_MAD_CO_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3SDOp_V_MAD_CO_I64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.i32) * (S1.i32) + (S2.i64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} -def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32; - # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32) VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_ADD_CO_CI_U32: _VOP3SDOp_V_ADD_CO_CI_U32, @@ -13172,353 +5022,159 @@ VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_SUBREV_CO_U32: _VOP3SDOp_V_SUBREV_CO_U32, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32) tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 >= S1[15 : 0].i16) else (S1[15 : 0].i16)) tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 >= S1[31 : 16].i16) else (S1[31 : 16].i16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 < S1[15 : 0].i16) else (S1[15 : 0].i16)) tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 < S1[31 : 16].i16) else (S1[31 : 16].i16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 >= S1[15 : 0].u16) else (S1[15 : 0].u16)) tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 >= S1[31 : 16].u16) else (S1[31 : 16].u16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 < S1[15 : 0].u16) else (S1[15 : 0].u16)) tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 < S1[31 : 16].u16) else (S1[31 : 16].u16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); - # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16) tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; - # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16 tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; - # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16 tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); - # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); - # tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8); - # tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8) tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8) tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8) tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); - # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); - # tmp += u4_to_u32(S0[11 : 8].u4) * u4_to_u32(S1[11 : 8].u4); - # tmp += u4_to_u32(S0[15 : 12].u4) * u4_to_u32(S1[15 : 12].u4); - # tmp += u4_to_u32(S0[19 : 16].u4) * u4_to_u32(S1[19 : 16].u4); - # tmp += u4_to_u32(S0[23 : 20].u4) * u4_to_u32(S1[23 : 20].u4); - # tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4); - # tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT8_U32_U4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4) tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4) @@ -13529,188 +5185,82 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4) tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); - # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16) tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_min_num_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_min_num_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_min_num_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_min_num_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_max_num_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_max_num_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_max_num_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_max_num_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_minimum_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_minimum_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MINIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_minimum_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_minimum_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_maximum_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_maximum_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAXIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_maximum_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_maximum_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_F32_FP8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += 32'F(S0[7 : 0].fp8) * 32'F(S1[7 : 0].bf8); - # tmp += 32'F(S0[15 : 8].fp8) * 32'F(S1[15 : 8].bf8); - # tmp += 32'F(S0[23 : 16].fp8) * 32'F(S1[23 : 16].bf8); - # tmp += 32'F(S0[31 : 24].fp8) * 32'F(S1[31 : 24].bf8); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_F32_FP8_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += F(S0[7 : 0].fp8) * F(S1[7 : 0].bf8) tmp += F(S0[15 : 8].fp8) * F(S1[15 : 8].bf8) tmp += F(S0[23 : 16].fp8) * F(S1[23 : 16].bf8) tmp += F(S0[31 : 24].fp8) * F(S1[31 : 24].bf8) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_F32_BF8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += 32'F(S0[7 : 0].bf8) * 32'F(S1[7 : 0].fp8); - # tmp += 32'F(S0[15 : 8].bf8) * 32'F(S1[15 : 8].fp8); - # tmp += 32'F(S0[23 : 16].bf8) * 32'F(S1[23 : 16].fp8); - # tmp += 32'F(S0[31 : 24].bf8) * 32'F(S1[31 : 24].fp8); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_F32_BF8_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += F(S0[7 : 0].bf8) * F(S1[7 : 0].fp8) tmp += F(S0[15 : 8].bf8) * F(S1[15 : 8].fp8) tmp += F(S0[23 : 16].bf8) * F(S1[23 : 16].fp8) tmp += F(S0[31 : 24].bf8) * F(S1[31 : 24].fp8) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_F32_FP8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += 32'F(S0[7 : 0].fp8) * 32'F(S1[7 : 0].fp8); - # tmp += 32'F(S0[15 : 8].fp8) * 32'F(S1[15 : 8].fp8); - # tmp += 32'F(S0[23 : 16].fp8) * 32'F(S1[23 : 16].fp8); - # tmp += 32'F(S0[31 : 24].fp8) * 32'F(S1[31 : 24].fp8); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_F32_FP8_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += F(S0[7 : 0].fp8) * F(S1[7 : 0].fp8) tmp += F(S0[15 : 8].fp8) * F(S1[15 : 8].fp8) tmp += F(S0[23 : 16].fp8) * F(S1[23 : 16].fp8) tmp += F(S0[31 : 24].fp8) * F(S1[31 : 24].fp8) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_F32_BF8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += 32'F(S0[7 : 0].bf8) * 32'F(S1[7 : 0].bf8); - # tmp += 32'F(S0[15 : 8].bf8) * 32'F(S1[15 : 8].bf8); - # tmp += 32'F(S0[23 : 16].bf8) * 32'F(S1[23 : 16].bf8); - # tmp += 32'F(S0[31 : 24].bf8) * 32'F(S1[31 : 24].bf8); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_F32_BF8_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += F(S0[7 : 0].bf8) * F(S1[7 : 0].bf8) tmp += F(S0[15 : 8].bf8) * F(S1[15 : 8].bf8) tmp += F(S0[23 : 16].bf8) * F(S1[23 : 16].bf8) tmp += F(S0[31 : 24].bf8) * F(S1[31 : 24].bf8) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3POp_FUNCTIONS = { VOP3POp.V_PK_MAD_I16: _VOP3POp_V_PK_MAD_I16, @@ -13744,1671 +5294,319 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_DOT4_F32_BF8_BF8: _VOP3POp_V_DOT4_F32_BF8_BF8, } -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -15422,54 +5620,9 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -15483,54 +5636,9 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -15544,1091 +5652,321 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 < S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f64 == S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 > S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <> S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 >= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 < S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i16 == S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 > S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <> S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 >= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 < S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u16 == S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 > S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <> S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 >= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 < S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i64 == S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 > S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <> S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 >= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 < S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 > S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 >= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -16642,46 +5980,9 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -16695,46 +5996,9 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -16748,10 +6012,7 @@ def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} VOPCOp_FUNCTIONS = { VOPCOp.V_CMP_LT_F16: _VOPCOp_V_CMP_LT_F16, diff --git a/extra/assembly/amd/emu.py b/extra/assembly/amd/emu.py index b24e904264..4e916604b4 100644 --- a/extra/assembly/amd/emu.py +++ b/extra/assembly/amd/emu.py @@ -1,8 +1,9 @@ # RDNA3 emulator - executes compiled pseudocode from AMD ISA PDF # mypy: ignore-errors from __future__ import annotations -import ctypes, struct -from extra.assembly.amd.dsl import Inst, RawImm, unwrap, FLOAT_ENC, MASK32, MASK64, _f32, _i32, _sext, _f16, _i16, _f64, _i64 +import ctypes +from extra.assembly.amd.dsl import Inst, unwrap, FLOAT_ENC, MASK32, MASK64, _f32, _i32, _sext, _f16, _i16, _f64, _i64 +from extra.assembly.amd.pcode import Reg from extra.assembly.amd.asm import detect_format from extra.assembly.amd.autogen.rdna3.gen_pcode import get_compiled_functions from extra.assembly.amd.autogen.rdna3.ins import (SOP1, SOP2, SOPC, SOPK, SOPP, SMEM, VOP1, VOP2, VOP3, VOP3SD, VOP3P, VOPC, DS, FLAT, VOPD, @@ -178,24 +179,21 @@ def exec_scalar(st: WaveState, inst: Inst) -> int: s0 = st.rsrc64(ssrc0, 0) if inst.is_src_64(0) else (st.rsrc(ssrc0, 0) if not isinstance(inst, (SOPK, SOPP)) else (st.rsgpr(inst.sdst) if isinstance(inst, SOPK) else 0)) s1 = st.rsrc64(inst.ssrc1, 0) if inst.is_src_64(1) else (st.rsrc(inst.ssrc1, 0) if isinstance(inst, (SOP2, SOPC)) else inst.simm16 if isinstance(inst, SOPK) else 0) d0 = st.rsgpr64(sdst) if inst.dst_regs() == 2 and sdst is not None else (st.rsgpr(sdst) if sdst is not None else 0) - exec_mask = st.exec_mask literal = inst.simm16 if isinstance(inst, (SOPK, SOPP)) else st.literal - # Execute compiled function - pass PC in bytes for instructions that need it - # For wave32, mask VCC and EXEC to 32 bits since only the lower 32 bits are relevant - pc_bytes = st.pc * 4 - vcc32, exec32 = st.vcc & MASK32, exec_mask & MASK32 - result = fn(s0, s1, 0, d0, st.scc, vcc32, 0, exec32, literal, None, {}, pc=pc_bytes) + # Create Reg objects for compiled function - mask VCC/EXEC to 32 bits for wave32 + result = fn(Reg(s0), Reg(s1), None, Reg(d0), Reg(st.scc), Reg(st.vcc & MASK32), 0, Reg(st.exec_mask & MASK32), literal, None, PC=Reg(st.pc * 4)) - # Apply results - if sdst is not None: - (st.wsgpr64 if result.get('d0_64') else st.wsgpr)(sdst, result['d0']) - if 'scc' in result: st.scc = result['scc'] - if 'exec' in result: st.exec_mask = result['exec'] - if 'new_pc' in result: + # Apply results - extract values from returned Reg objects + if sdst is not None and 'D0' in result: + (st.wsgpr64 if inst.dst_regs() == 2 else st.wsgpr)(sdst, result['D0']._val) + if 'SCC' in result: st.scc = result['SCC']._val & 1 + if 'EXEC' in result: st.exec_mask = result['EXEC']._val + if 'PC' in result: # Convert absolute byte address to word delta - # new_pc is where we want to go, st.pc is current position, inst._words will be added after - new_pc_words = result['new_pc'] // 4 + pc_val = result['PC']._val + new_pc = pc_val if pc_val < 0x8000000000000000 else pc_val - 0x10000000000000000 + new_pc_words = new_pc // 4 return new_pc_words - st.pc - 1 # -1 because emulator adds inst_words (1 for scalar) return 0 @@ -260,24 +258,25 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No vdsty = (inst.vdsty << 1) | ((inst.vdstx & 1) ^ 1) inputs = [(inst.opx, st.rsrc(inst.srcx0, lane), V[inst.vsrcx1], V[inst.vdstx], inst.vdstx), (inst.opy, st.rsrc(inst.srcy0, lane), V[inst.vsrcy1], V[vdsty], vdsty)] - results = [(dst, fn(s0, s1, 0, d0, st.scc, st.vcc, lane, st.exec_mask, st.literal, None, {})['d0']) - for vopd_op, s0, s1, d0, dst in inputs if (op := _VOPD_TO_VOP.get(vopd_op)) and (fn := compiled.get(type(op), {}).get(op))] - for dst, val in results: V[dst] = val + def exec_vopd(vopd_op, s0, s1, d0): + op = _VOPD_TO_VOP[vopd_op] + return compiled[type(op)][op](Reg(s0), Reg(s1), None, Reg(d0), Reg(st.scc), Reg(st.vcc), lane, Reg(st.exec_mask), st.literal, None)['D0']._val + for vopd_op, s0, s1, d0, dst in inputs: V[dst] = exec_vopd(vopd_op, s0, s1, d0) return # VOP3SD: has extra scalar dest for carry output if isinstance(inst, VOP3SD): - fn = compiled.get(VOP3SDOp, {}).get(inst.op) - if fn is None: raise NotImplementedError(f"{inst.op.name} not in pseudocode") + fn = compiled[VOP3SDOp][inst.op] # Read sources based on register counts from inst properties def rsrc_n(src, regs): return st.rsrc64(src, lane) if regs == 2 else st.rsrc(src, lane) s0, s1, s2 = rsrc_n(inst.src0, inst.src_regs(0)), rsrc_n(inst.src1, inst.src_regs(1)), rsrc_n(inst.src2, inst.src_regs(2)) # Carry-in ops use src2 as carry bitmask instead of VCC vcc = st.rsgpr64(inst.src2) if 'CO_CI' in inst.op_name else st.vcc - result = fn(s0, s1, s2, V[inst.vdst], st.scc, vcc, lane, st.exec_mask, st.literal, None, {}) - V[inst.vdst] = result['d0'] & MASK32 - if result.get('d0_64'): V[inst.vdst + 1] = (result['d0'] >> 32) & MASK32 - if result.get('vcc_lane') is not None: st.pend_sgpr_lane(inst.sdst, lane, result['vcc_lane']) + result = fn(Reg(s0), Reg(s1), Reg(s2), Reg(V[inst.vdst]), Reg(st.scc), Reg(vcc), lane, Reg(st.exec_mask), st.literal, None) + d0_val = result['D0']._val + V[inst.vdst] = d0_val & MASK32 + if inst.dst_regs() == 2: V[inst.vdst + 1] = (d0_val >> 32) & MASK32 + if 'VCC' in result: st.pend_sgpr_lane(inst.sdst, lane, (result['VCC']._val >> lane) & 1) return # Get op enum and sources (None means "no source" for that operand) @@ -317,8 +316,7 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No if abs_ & (1<= 256 else (src0 if src0 is not None else 0) - result = fn(s0, s1, s2, d0, st.scc, vcc_for_fn, lane, st.exec_mask, st.literal, st.vgpr, {}, src0_idx, vdst) + result = fn(Reg(s0), Reg(s1), Reg(s2), Reg(d0), Reg(st.scc), Reg(vcc_for_fn), lane, Reg(st.exec_mask), st.literal, st.vgpr, src0_idx, vdst) - # Apply results + # Apply results - extract values from returned Reg objects if 'vgpr_write' in result: # Lane instruction wrote to VGPR: (lane, vgpr_idx, value) wr_lane, wr_idx, wr_val = result['vgpr_write'] st.vgpr[wr_lane][wr_idx] = wr_val - if 'vcc_lane' in result: + if 'VCC' in result: # VOP2 carry ops write to VCC implicitly; VOPC/VOP3 write to vdst - st.pend_sgpr_lane(VCC_LO if isinstance(inst, VOP2) and 'CO_CI' in inst.op_name else vdst, lane, result['vcc_lane']) - if 'exec_lane' in result: - # V_CMPX instructions write to EXEC per-lane - st.pend_sgpr_lane(EXEC_LO, lane, result['exec_lane']) - if 'd0' in result and op_cls is not VOPCOp and 'vgpr_write' not in result: + st.pend_sgpr_lane(VCC_LO if isinstance(inst, VOP2) and 'CO_CI' in inst.op_name else vdst, lane, (result['VCC']._val >> lane) & 1) + if 'EXEC' in result: + # V_CMPX instructions write to EXEC per-lane (not to vdst) + st.pend_sgpr_lane(EXEC_LO, lane, (result['EXEC']._val >> lane) & 1) + elif op_cls is VOPCOp: + # VOPC comparison result stored in D0 bitmask, extract lane bit (non-CMPX only) + st.pend_sgpr_lane(vdst, lane, (result['D0']._val >> lane) & 1) + if op_cls is not VOPCOp and 'vgpr_write' not in result: writes_to_sgpr = 'READFIRSTLANE' in inst.op_name or 'READLANE' in inst.op_name - d0_val = result['d0'] + d0_val = result['D0']._val if writes_to_sgpr: st.wsgpr(vdst, d0_val & MASK32) - elif result.get('d0_64'): V[vdst], V[vdst + 1] = d0_val & MASK32, (d0_val >> 32) & MASK32 + elif inst.dst_regs() == 2: V[vdst], V[vdst + 1] = d0_val & MASK32, (d0_val >> 32) & MASK32 elif inst.is_dst_16(): V[vdst] = _dst16(V[vdst], d0_val, bool(opsel & 8) if isinstance(inst, VOP3) else dst_hi) else: V[vdst] = d0_val & MASK32 diff --git a/extra/assembly/amd/pdf.py b/extra/assembly/amd/pdf.py index a7a91a2166..abd35022cd 100644 --- a/extra/assembly/amd/pdf.py +++ b/extra/assembly/amd/pdf.py @@ -43,7 +43,10 @@ UNSUPPORTED = ['SGPR[', 'V_SWAP', 'eval ', 'FATAL_HALT', 'HW_REGISTERS', 'vscnt', 'vmcnt', 'expcnt', 'lgkmcnt', 'CVT_OFF_TABLE', 'ThreadMask', 'S1[i', 'C.i32', 'S[i]', 'in[', - 'if n.', 'DST.u32', 'addrd = DST', 'addr = DST'] # Malformed pseudocode from PDF + 'if n.', 'DST.u32', 'addrd = DST', 'addr = DST', + 'BARRIER_STATE', 'ReallocVgprs', + 'GPR_IDX', 'VSKIP', 'specified in', 'TTBL', + 'fp6', 'bf6'] # Malformed pseudocode from PDF # ═══════════════════════════════════════════════════════════════════════════════ # COMPILER: pseudocode -> Python (minimal transforms) @@ -51,6 +54,7 @@ UNSUPPORTED = ['SGPR[', 'V_SWAP', 'eval ', 'FATAL_HALT', 'HW_REGISTERS', def compile_pseudocode(pseudocode: str) -> str: """Compile pseudocode to Python. Transforms are minimal - most syntax just works.""" + pseudocode = re.sub(r'\bpass\b', 'pass_', pseudocode) # 'pass' is Python keyword raw_lines = pseudocode.strip().split('\n') joined_lines: list[str] = [] for line in raw_lines: @@ -113,7 +117,7 @@ def compile_pseudocode(pseudocode: str) -> str: break else: lhs, rhs = line.split('=', 1) - lhs_s, rhs_s = lhs.strip(), rhs.strip() + lhs_s, rhs_s = _expr(lhs.strip()), rhs.strip() stmt = _assign(lhs_s, _expr(rhs_s)) if in_first_match_loop and rhs_s == 'i' and (lhs_s == 'tmp' or lhs_s == 'D0.i32'): stmt += "; break" @@ -533,52 +537,57 @@ def _apply_pseudocode_fixes(op, code: str) -> str: def _generate_function(cls_name: str, op, pc: str, code: str) -> tuple[str, str]: """Generate a single compiled pseudocode function.""" - is_64 = any(p in pc for p in ['D0.u64', 'D0.b64', 'D0.f64', 'D0.i64', 'D1.u64', 'D1.b64', 'D1.f64', 'D1.i64']) has_d1 = '{ D1' in pc - if has_d1: is_64 = True - is_cmp = (cls_name in ('VOPCOp', 'VOP3Op')) and 'D0.u64[laneId]' in pc is_cmpx = (cls_name in ('VOPCOp', 'VOP3Op')) and 'EXEC.u64[laneId]' in pc is_div_scale = 'DIV_SCALE' in op.name has_sdst = cls_name == 'VOP3SDOp' and ('VCC.u64[laneId]' in pc or is_div_scale) - has_pc = 'PC' in pc combined = code + pc fn_name = f"_{cls_name}_{op.name}" - lines = [f"def {fn_name}(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0):"] - for pc_line in pc.split('\n'): lines.append(f" # {pc_line}") + # Function accepts Reg objects directly (uppercase names), laneId is passed directly as int + lines = [f"def {fn_name}(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):"] - regs = [('S0', 'Reg(s0)'), ('S1', 'Reg(s1)'), ('S2', 'Reg(s2)'), - ('D0', 'Reg(s0)' if is_div_scale else 'Reg(d0)'), ('D1', 'Reg(0)'), - ('SCC', 'Reg(scc)'), ('VCC', 'Reg(vcc)'), ('EXEC', 'Reg(exec_mask)'), - ('tmp', 'Reg(0)'), ('saveexec', 'Reg(exec_mask)'), ('laneId', 'lane'), - ('SIMM16', 'Reg(literal)'), ('SIMM32', 'Reg(literal)'), - ('SRC0', 'Reg(src0_idx)'), ('VDST', 'Reg(vdst_idx)'), ('PC', 'Reg(pc)')] - used = {name for name, _ in regs if name in combined} - if 'EXEC_LO' in combined or 'EXEC_HI' in combined: used.add('EXEC') - if 'VCCZ' in combined: used.add('VCC') - if 'EXECZ' in combined: used.add('EXEC') - for name, init in regs: - if name in used: lines.append(f" {name} = {init}") - if 'EXEC_LO' in combined: lines.append(" EXEC_LO = SliceProxy(EXEC, 31, 0)") - if 'EXEC_HI' in combined: lines.append(" EXEC_HI = SliceProxy(EXEC, 63, 32)") - if 'VCCZ' in combined: lines.append(" VCCZ = Reg(1 if VCC._val == 0 else 0)") - if 'EXECZ' in combined: lines.append(" EXECZ = Reg(1 if EXEC._val == 0 else 0)") - lines.append(" # --- compiled pseudocode ---") - for line in code.split('\n'): lines.append(f" {line}") - lines.append(" # --- end pseudocode ---") - d0_val, scc_val = ("D0._val" if 'D0' in used else "d0"), ("SCC._val & 1" if 'SCC' in used else "scc & 1") - lines.append(f" result = {{'d0': {d0_val}, 'scc': {scc_val}}}") - if has_sdst: lines.append(" result['vcc_lane'] = (VCC._val >> lane) & 1") - elif 'VCC' in used: lines.append(" if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1") - if is_cmpx: lines.append(" result['exec_lane'] = (EXEC._val >> lane) & 1") - elif 'EXEC' in used: lines.append(" if EXEC._val != exec_mask: result['exec'] = EXEC._val") - if is_cmp: lines.append(" result['vcc_lane'] = (D0._val >> lane) & 1") - if is_64: lines.append(" result['d0_64'] = True") - if has_d1: lines.append(" result['d1'] = D1._val & 1") - if has_pc: - lines.append(" _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000") - lines.append(" result['new_pc'] = _pc") - lines.append(" return result\n") + # Registers that need special handling (not passed directly) + # Only init if used but not first assigned as `name = Reg(...)` in the compiled code + def needs_init(name): return name in combined and not re.search(rf'^\s*{name}\s*=\s*Reg\(', code, re.MULTILINE) + special_regs = [('D1', 'Reg(0)'), ('SIMM16', 'Reg(literal)'), ('SIMM32', 'Reg(literal)'), + ('SRC0', 'Reg(src0_idx)'), ('VDST', 'Reg(vdst_idx)')] + if needs_init('tmp'): special_regs.insert(0, ('tmp', 'Reg(0)')) + if needs_init('saveexec'): special_regs.insert(0, ('saveexec', 'Reg(EXEC._val)')) + used = {name for name, _ in special_regs if name in combined} + + # Detect which registers are modified (not just read) - look for assignments + modifies_d0 = is_div_scale or bool(re.search(r'\bD0\b[.\[]', combined)) + modifies_exec = is_cmpx or bool(re.search(r'EXEC\.(u32|u64|b32|b64)\s*=', combined)) + modifies_vcc = has_sdst or bool(re.search(r'VCC\.(u32|u64|b32|b64)\s*=|VCC\.u64\[laneId\]\s*=', combined)) + modifies_scc = bool(re.search(r'\bSCC\s*=', combined)) + modifies_pc = bool(re.search(r'\bPC\s*=', combined)) + + # Build init code for special registers + init_lines = [] + if is_div_scale: init_lines.append(" D0 = Reg(S0._val)") + for name, init in special_regs: + if name in used: init_lines.append(f" {name} = {init}") + if 'EXEC_LO' in code: init_lines.append(" EXEC_LO = SliceProxy(EXEC, 31, 0)") + if 'EXEC_HI' in code: init_lines.append(" EXEC_HI = SliceProxy(EXEC, 63, 32)") + if 'VCCZ' in code and not re.search(r'^\s*VCCZ\s*=', code, re.MULTILINE): init_lines.append(" VCCZ = Reg(1 if VCC._val == 0 else 0)") + if 'EXECZ' in code and not re.search(r'^\s*EXECZ\s*=', code, re.MULTILINE): init_lines.append(" EXECZ = Reg(1 if EXEC._val == 0 else 0)") + code_lines = [line for line in code.split('\n') if line.strip()] + if init_lines: + lines.extend(init_lines) + if code_lines: lines.append(" # --- compiled pseudocode ---") + for line in code_lines: + lines.append(f" {line}") + + # Build result dict - only include registers that are modified + result_items = [] + if modifies_d0: result_items.append("'D0': D0") + if modifies_scc: result_items.append("'SCC': SCC") + if modifies_vcc: result_items.append("'VCC': VCC") + if modifies_exec: result_items.append("'EXEC': EXEC") + if has_d1: result_items.append("'D1': D1") + if modifies_pc: result_items.append("'PC': PC") + lines.append(f" return {{{', '.join(result_items)}}}\n") return fn_name, '\n'.join(lines) # ═══════════════════════════════════════════════════════════════════════════════ diff --git a/extra/assembly/amd/test/test_pcode.py b/extra/assembly/amd/test/test_pcode.py index 9fdc630c00..be1274a863 100644 --- a/extra/assembly/amd/test/test_pcode.py +++ b/extra/assembly/amd/test/test_pcode.py @@ -229,17 +229,18 @@ class TestPseudocodeRegressions(unittest.TestCase): """Regression tests for pseudocode instruction emulation bugs.""" def test_v_div_scale_f32_vcc_always_returned(self): - """V_DIV_SCALE_F32 must always return vcc_lane, even when VCC=0 (no scaling needed). - Bug: when VCC._val == vcc (both 0), vcc_lane wasn't returned, so VCC bits weren't written. + """V_DIV_SCALE_F32 must always return VCC, even when VCC=0 (no scaling needed). + Bug: when VCC._val == vcc (both 0), VCC wasn't returned, so VCC bits weren't written. This caused division to produce wrong results for multiple lanes.""" # Normal case: 1.0 / 3.0, no scaling needed, VCC should be 0 - s0 = 0x3f800000 # 1.0 - s1 = 0x40400000 # 3.0 - s2 = 0x3f800000 # 1.0 (numerator) - result = _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, 0, 0, 0, 0, 0xffffffff, 0, None, {}) - # Must always have vcc_lane in result - self.assertIn('vcc_lane', result, "V_DIV_SCALE_F32 must always return vcc_lane") - self.assertEqual(result['vcc_lane'], 0, "vcc_lane should be 0 when no scaling needed") + S0 = Reg(0x3f800000) # 1.0 + S1 = Reg(0x40400000) # 3.0 + S2 = Reg(0x3f800000) # 1.0 (numerator) + D0, SCC, VCC, EXEC = Reg(0), Reg(0), Reg(0), Reg(0xffffffff) + result = _VOP3SDOp_V_DIV_SCALE_F32(S0, S1, S2, D0, SCC, VCC, 0, EXEC, 0, None) + # Must always have VCC in result + self.assertIn('VCC', result, "V_DIV_SCALE_F32 must always return VCC") + self.assertEqual(result['VCC']._val & 1, 0, "VCC lane 0 should be 0 when no scaling needed") def test_v_cmp_class_f32_detects_quiet_nan(self): """V_CMP_CLASS_F32 must correctly identify quiet NaN vs signaling NaN. @@ -248,18 +249,22 @@ class TestPseudocodeRegressions(unittest.TestCase): signal_nan = 0x7f800001 # signaling NaN: exponent=255, bit22=0 # Test quiet NaN detection (bit 1 in mask) s1_quiet = 0b0000000010 # bit 1 = quiet NaN - result = _VOPCOp_V_CMP_CLASS_F32(quiet_nan, s1_quiet, 0, 0, 0, 0, 0, 0xffffffff, 0, None, {}) - self.assertEqual(result['vcc_lane'], 1, "Should detect quiet NaN with quiet NaN mask") + S0, S1, S2, D0, SCC, VCC, EXEC = Reg(quiet_nan), Reg(s1_quiet), Reg(0), Reg(0), Reg(0), Reg(0), Reg(0xffffffff) + result = _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, 0, EXEC, 0, None) + self.assertEqual(result['D0']._val & 1, 1, "Should detect quiet NaN with quiet NaN mask") # Test signaling NaN detection (bit 0 in mask) s1_signal = 0b0000000001 # bit 0 = signaling NaN - result = _VOPCOp_V_CMP_CLASS_F32(signal_nan, s1_signal, 0, 0, 0, 0, 0, 0xffffffff, 0, None, {}) - self.assertEqual(result['vcc_lane'], 1, "Should detect signaling NaN with signaling NaN mask") + S0, S1 = Reg(signal_nan), Reg(s1_signal) + result = _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, 0, EXEC, 0, None) + self.assertEqual(result['D0']._val & 1, 1, "Should detect signaling NaN with signaling NaN mask") # Test that quiet NaN doesn't match signaling NaN mask - result = _VOPCOp_V_CMP_CLASS_F32(quiet_nan, s1_signal, 0, 0, 0, 0, 0, 0xffffffff, 0, None, {}) - self.assertEqual(result['vcc_lane'], 0, "Quiet NaN should not match signaling NaN mask") + S0, S1 = Reg(quiet_nan), Reg(s1_signal) + result = _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, 0, EXEC, 0, None) + self.assertEqual(result['D0']._val & 1, 0, "Quiet NaN should not match signaling NaN mask") # Test that signaling NaN doesn't match quiet NaN mask - result = _VOPCOp_V_CMP_CLASS_F32(signal_nan, s1_quiet, 0, 0, 0, 0, 0, 0xffffffff, 0, None, {}) - self.assertEqual(result['vcc_lane'], 0, "Signaling NaN should not match quiet NaN mask") + S0, S1 = Reg(signal_nan), Reg(s1_quiet) + result = _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, 0, EXEC, 0, None) + self.assertEqual(result['D0']._val & 1, 0, "Signaling NaN should not match quiet NaN mask") def test_isnan_with_typed_view(self): """_isnan must work with TypedView objects, not just Python floats.