From 9c89be5235eabc403d25aa95aa72a64e41794682 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Tue, 30 Dec 2025 09:25:40 -0500 Subject: [PATCH] assembly/amd: fix v_perm_b32 + PC fixes (#13897) * assembly/amd: fix v_perm_b32 * add pc support --- extra/assembly/amd/autogen/cdna/gen_pcode.py | 3484 ++++++++++++----- extra/assembly/amd/autogen/rdna3/gen_pcode.py | 2852 +++++++++----- extra/assembly/amd/autogen/rdna4/gen_pcode.py | 2660 ++++++++----- extra/assembly/amd/emu.py | 69 +- extra/assembly/amd/pcode.py | 45 +- extra/assembly/amd/test/test_emu.py | 24 + 6 files changed, 6134 insertions(+), 3000 deletions(-) diff --git a/extra/assembly/amd/autogen/cdna/gen_pcode.py b/extra/assembly/amd/autogen/cdna/gen_pcode.py index 53a39ddfce..cb2f3e8f06 100644 --- a/extra/assembly/amd/autogen/cdna/gen_pcode.py +++ b/extra/assembly/amd/autogen/cdna/gen_pcode.py @@ -5,7 +5,7 @@ from extra.assembly.amd.autogen.cdna import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3POp, VOPCOp, VOP3AOp, VOP3BOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -15,7 +15,7 @@ def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b64 = S0.b64 S0 = Reg(s0) D0 = Reg(d0) @@ -26,7 +26,7 @@ def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b32 = S0.b32 # endif @@ -40,7 +40,7 @@ def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b64 = S0.b64 # endif @@ -55,7 +55,7 @@ def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32; # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -68,7 +68,7 @@ def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~S0.u64; # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -82,7 +82,7 @@ def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # declare i : 6'U; # for i in 6'0U : 6'31U do @@ -104,7 +104,7 @@ def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # declare i : 6'U; # for i in 6'0U : 6'63U do @@ -127,7 +127,7 @@ def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -137,7 +137,7 @@ def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[63 : 0] = S0.u64[0 : 63] S0 = Reg(s0) D0 = Reg(d0) @@ -148,7 +148,7 @@ def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'0U ? 1 : 0 @@ -169,7 +169,7 @@ def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'0U ? 1 : 0 @@ -191,7 +191,7 @@ def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'1U ? 1 : 0 @@ -212,7 +212,7 @@ def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'1U ? 1 : 0 @@ -234,7 +234,7 @@ def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_FF0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FF0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no zeros are found # for i in 0 : 31 do @@ -257,7 +257,7 @@ def _SOP1Op_S_FF0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FF0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FF0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no zeros are found # for i in 0 : 63 do @@ -280,7 +280,7 @@ def _SOP1Op_S_FF0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FF1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FF1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -303,7 +303,7 @@ def _SOP1Op_S_FF1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FF1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FF1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -326,7 +326,7 @@ def _SOP1Op_S_FF1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLBIT_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLBIT_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -349,7 +349,7 @@ def _SOP1Op_S_FLBIT_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLBIT_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLBIT_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -372,7 +372,7 @@ def _SOP1Op_S_FLBIT_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLBIT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLBIT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -395,7 +395,7 @@ def _SOP1Op_S_FLBIT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLBIT_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLBIT_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 63 do @@ -418,7 +418,7 @@ def _SOP1Op_S_FLBIT_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i8)) S0 = Reg(s0) D0 = Reg(d0) @@ -428,7 +428,7 @@ def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -438,7 +438,7 @@ def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -448,7 +448,7 @@ def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -459,7 +459,7 @@ def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -469,7 +469,7 @@ def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -480,7 +480,62 @@ def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # jump_addr = S0.i64; + # D0.i64 = PC + 4LL; + # PC = jump_addr.i64 + S0 = Reg(s0) + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + jump_addr = S0.i64 + D0.i64 = PC + 4 + PC = Reg(jump_addr.i64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -503,7 +558,7 @@ def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u64; @@ -526,7 +581,7 @@ def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -549,7 +604,7 @@ def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_ANDN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ANDN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -572,7 +627,7 @@ def _SOP1Op_S_ANDN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result['d0_64'] = True return result -def _SOP1Op_S_ORN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ORN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -595,7 +650,7 @@ def _SOP1Op_S_ORN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -618,7 +673,7 @@ def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -641,7 +696,7 @@ def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -664,7 +719,7 @@ def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # for i in 0 : 7 do # tmp[i] = S0.u32[i * 4 +: 4] != 0U @@ -685,7 +740,7 @@ def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # for i in 0 : 15 do # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL @@ -707,7 +762,7 @@ def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -720,7 +775,7 @@ def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_SET_GPR_IDX_IDX(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SET_GPR_IDX_IDX(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # M0[7 : 0] = S0.u32[7 : 0].b8 S0 = Reg(s0) # --- compiled pseudocode --- @@ -729,7 +784,7 @@ def _SOP1Op_S_SET_GPR_IDX_IDX(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': d0, 'scc': scc & 1} return result -def _SOP1Op_S_ANDN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ANDN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -752,7 +807,7 @@ def _SOP1Op_S_ANDN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result['d0_64'] = True return result -def _SOP1Op_S_ORN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ORN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -775,7 +830,7 @@ def _SOP1Op_S_ORN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_ANDN1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ANDN1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -796,7 +851,7 @@ def _SOP1Op_S_ANDN1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_ANDN2_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ANDN2_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -817,7 +872,7 @@ def _SOP1Op_S_ANDN2_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32; # for i in 0 : 31 do # D0.u64[i * 2] = tmp[i]; @@ -865,6 +920,10 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_BITSET0_B64: _SOP1Op_S_BITSET0_B64, SOP1Op.S_BITSET1_B32: _SOP1Op_S_BITSET1_B32, SOP1Op.S_BITSET1_B64: _SOP1Op_S_BITSET1_B64, + SOP1Op.S_GETPC_B64: _SOP1Op_S_GETPC_B64, + SOP1Op.S_SETPC_B64: _SOP1Op_S_SETPC_B64, + SOP1Op.S_SWAPPC_B64: _SOP1Op_S_SWAPPC_B64, + SOP1Op.S_RFE_B64: _SOP1Op_S_RFE_B64, SOP1Op.S_AND_SAVEEXEC_B64: _SOP1Op_S_AND_SAVEEXEC_B64, SOP1Op.S_OR_SAVEEXEC_B64: _SOP1Op_S_OR_SAVEEXEC_B64, SOP1Op.S_XOR_SAVEEXEC_B64: _SOP1Op_S_XOR_SAVEEXEC_B64, @@ -884,7 +943,7 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_BITREPLICATE_B64_B32: _SOP1Op_S_BITREPLICATE_B64_B32, } -def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -901,7 +960,7 @@ def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -918,7 +977,7 @@ def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 + S1.i32; # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -935,7 +994,7 @@ def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 - S1.i32; # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -952,7 +1011,7 @@ def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -969,7 +1028,7 @@ def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - SCC.u32; # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -986,7 +1045,7 @@ def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1000,7 +1059,7 @@ def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1014,7 +1073,7 @@ def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1028,7 +1087,7 @@ def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1042,7 +1101,7 @@ def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1054,7 +1113,7 @@ def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = SCC ? S0.u64 : S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -1067,7 +1126,7 @@ def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1081,7 +1140,7 @@ def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1096,7 +1155,7 @@ def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1110,7 +1169,7 @@ def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1125,7 +1184,7 @@ def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result['d0_64'] = True return result -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1139,7 +1198,7 @@ def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1154,7 +1213,7 @@ def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_ANDN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ANDN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1168,7 +1227,7 @@ def _SOP2Op_S_ANDN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ANDN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ANDN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1183,7 +1242,7 @@ def _SOP2Op_S_ANDN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _SOP2Op_S_ORN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ORN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1197,7 +1256,7 @@ def _SOP2Op_S_ORN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ORN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ORN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1212,7 +1271,7 @@ def _SOP2Op_S_ORN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1226,7 +1285,7 @@ def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1241,7 +1300,7 @@ def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1255,7 +1314,7 @@ def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1270,7 +1329,7 @@ def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1284,7 +1343,7 @@ def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1299,7 +1358,7 @@ def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1313,7 +1372,7 @@ def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 << S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1328,7 +1387,7 @@ def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 >> S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1342,7 +1401,7 @@ def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 >> S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1357,7 +1416,7 @@ def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -1371,7 +1430,7 @@ def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); # SCC = D0.i64 != 0LL S0 = Reg(s0) @@ -1386,7 +1445,7 @@ def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -1397,7 +1456,7 @@ def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -1409,7 +1468,7 @@ def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 * S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1420,7 +1479,7 @@ def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1434,7 +1493,7 @@ def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); # SCC = D0.i32 != 0 @@ -1451,7 +1510,7 @@ def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1466,7 +1525,7 @@ def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); # SCC = D0.i64 != 0LL @@ -1484,7 +1543,7 @@ def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32; # if D0.i32 < 0 then # D0.i32 = -D0.i32 @@ -1503,7 +1562,7 @@ def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -1514,7 +1573,7 @@ def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -1525,7 +1584,7 @@ def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1542,7 +1601,7 @@ def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1559,7 +1618,7 @@ def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1576,7 +1635,7 @@ def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1593,7 +1652,7 @@ def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -1604,7 +1663,7 @@ def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -1615,7 +1674,7 @@ def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -1680,7 +1739,7 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_PACK_HH_B32_B16: _SOP2Op_S_PACK_HH_B32_B16, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 == S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1691,7 +1750,7 @@ def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1702,7 +1761,7 @@ def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1713,7 +1772,7 @@ def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1724,7 +1783,7 @@ def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1735,7 +1794,7 @@ def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1746,7 +1805,7 @@ def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 == S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1757,7 +1816,7 @@ def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1768,7 +1827,7 @@ def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1779,7 +1838,7 @@ def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1790,7 +1849,7 @@ def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1801,7 +1860,7 @@ def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1812,7 +1871,7 @@ def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -1823,7 +1882,7 @@ def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -1834,7 +1893,7 @@ def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -1845,7 +1904,7 @@ def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -1856,7 +1915,7 @@ def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_SETVSKIP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_SETVSKIP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VSKIP = S0.u32[S1.u32[4 : 0]] S0 = Reg(s0) S1 = Reg(s1) @@ -1866,7 +1925,7 @@ def _SOPCOp_S_SETVSKIP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': d0, 'scc': scc & 1} return result -def _SOPCOp_S_SET_GPR_IDX_ON(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_SET_GPR_IDX_ON(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # specified in the SRC0 operand. The raw bits of the SRC1 field are read and used to set the enable bits. S1[0] = # VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and S1[3] = VDST_REL. # M0[7 : 0] = S0.u32[7 : 0].b8; @@ -1883,7 +1942,7 @@ def _SOPCOp_S_SET_GPR_IDX_ON(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 == S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -1894,7 +1953,7 @@ def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -1928,7 +1987,7 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_LG_U64: _SOPCOp_S_CMP_LG_U64, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -1938,7 +1997,7 @@ def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.i32 = 32'I(signext(S0.i16)) # endif @@ -1952,7 +2011,7 @@ def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 == 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -1963,7 +2022,7 @@ def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 != 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -1974,7 +2033,7 @@ def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 > 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -1985,7 +2044,7 @@ def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -1996,7 +2055,7 @@ def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -2007,7 +2066,7 @@ def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <= 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -2018,7 +2077,7 @@ def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 == 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2029,7 +2088,7 @@ def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 != 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2040,7 +2099,7 @@ def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 > 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2051,7 +2110,7 @@ def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2062,7 +2121,7 @@ def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2073,7 +2132,7 @@ def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <= 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2084,7 +2143,7 @@ def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # D0.i32 = D0.i32 + 32'I(signext(S0.i16)); # SCC = ((tmp[31] == S0.i16[15]) && (tmp[31] != D0.i32[31])); @@ -2100,7 +2159,7 @@ def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = D0.i32 * 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -2110,6 +2169,22 @@ def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result +def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL; + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + D0 = Reg(d0) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, SOPKOp.S_CMOVK_I32: _SOPKOp_S_CMOVK_I32, @@ -2127,9 +2202,10 @@ SOPKOp_FUNCTIONS = { SOPKOp.S_CMPK_LE_U32: _SOPKOp_S_CMPK_LE_U32, SOPKOp.S_ADDK_I32: _SOPKOp_S_ADDK_I32, SOPKOp.S_MULK_I32: _SOPKOp_S_MULK_I32, + SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # for i in 0U : SIMM16.u16[3 : 0].u32 do # endfor SIMM16 = Reg(literal) @@ -2140,16 +2216,238 @@ def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _var result = {'d0': d0, 'scc': scc & 1} return result -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 1 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 0 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // PC passed into trap handler points to S_TRAP itself, + # PC = TBA.i64; # // trap base address + PC = Reg(pc) # --- compiled pseudocode --- # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _SOPPOp_S_SET_GPR_IDX_MODE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_CBRANCH_CDBGSYS(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if WAVE_STATUS.COND_DBG_SYS.u32 != 0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if WAVE_STATUS.COND_DBG_SYS.u32 != 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGUSER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if WAVE_STATUS.COND_DBG_USER.u32 != 0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if WAVE_STATUS.COND_DBG_USER.u32 != 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if (WAVE_STATUS.COND_DBG_SYS || WAVE_STATUS.COND_DBG_USER) then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if (WAVE_STATUS.COND_DBG_SYS or WAVE_STATUS.COND_DBG_USER): + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if (WAVE_STATUS.COND_DBG_SYS && WAVE_STATUS.COND_DBG_USER) then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if (WAVE_STATUS.COND_DBG_SYS and WAVE_STATUS.COND_DBG_USER): + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_SET_GPR_IDX_MODE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Get Doorbell ID 10 - Returns doorbell into EXEC, with the doorbell physical address in bits EXEC = Reg(exec_mask) # --- compiled pseudocode --- @@ -2161,11 +2459,22 @@ def _SOPPOp_S_SET_GPR_IDX_MODE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, + SOPPOp.S_BRANCH: _SOPPOp_S_BRANCH, + SOPPOp.S_CBRANCH_SCC0: _SOPPOp_S_CBRANCH_SCC0, + SOPPOp.S_CBRANCH_SCC1: _SOPPOp_S_CBRANCH_SCC1, + SOPPOp.S_CBRANCH_VCCZ: _SOPPOp_S_CBRANCH_VCCZ, + SOPPOp.S_CBRANCH_VCCNZ: _SOPPOp_S_CBRANCH_VCCNZ, + SOPPOp.S_CBRANCH_EXECZ: _SOPPOp_S_CBRANCH_EXECZ, + SOPPOp.S_CBRANCH_EXECNZ: _SOPPOp_S_CBRANCH_EXECNZ, SOPPOp.S_TRAP: _SOPPOp_S_TRAP, + SOPPOp.S_CBRANCH_CDBGSYS: _SOPPOp_S_CBRANCH_CDBGSYS, + SOPPOp.S_CBRANCH_CDBGUSER: _SOPPOp_S_CBRANCH_CDBGUSER, + SOPPOp.S_CBRANCH_CDBGSYS_OR_USER: _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER, + SOPPOp.S_CBRANCH_CDBGSYS_AND_USER: _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER, SOPPOp.S_SET_GPR_IDX_MODE: _SOPPOp_S_SET_GPR_IDX_MODE, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -2175,7 +2484,7 @@ def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'I; # if EXEC == 0x0LL then # lane = 0; @@ -2199,7 +2508,7 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2209,7 +2518,7 @@ def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -2220,7 +2529,7 @@ def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -2230,7 +2539,7 @@ def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2240,7 +2549,7 @@ def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2250,7 +2559,7 @@ def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2260,7 +2569,7 @@ def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2270,7 +2579,7 @@ def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2280,7 +2589,7 @@ def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -2290,7 +2599,7 @@ def _VOP1Op_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -2300,7 +2609,7 @@ def _VOP1Op_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2310,7 +2619,7 @@ def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2321,7 +2630,7 @@ def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2331,7 +2640,7 @@ def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2341,7 +2650,7 @@ def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2351,7 +2660,7 @@ def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2361,7 +2670,7 @@ def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2371,7 +2680,7 @@ def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2382,7 +2691,7 @@ def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2393,7 +2702,7 @@ def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -2409,7 +2718,7 @@ def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -2425,7 +2734,7 @@ def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -2441,7 +2750,7 @@ def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2451,7 +2760,7 @@ def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2461,7 +2770,7 @@ def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -2476,7 +2785,7 @@ def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -2491,7 +2800,7 @@ def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -2506,7 +2815,7 @@ def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2516,7 +2825,7 @@ def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2526,7 +2835,7 @@ def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -2536,7 +2845,7 @@ def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -2547,7 +2856,7 @@ def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2557,7 +2866,7 @@ def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -2568,7 +2877,7 @@ def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2579,7 +2888,7 @@ def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2589,7 +2898,7 @@ def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2600,7 +2909,7 @@ def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -2610,7 +2919,7 @@ def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -2620,7 +2929,7 @@ def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -2630,7 +2939,7 @@ def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -2640,7 +2949,7 @@ def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -2660,7 +2969,7 @@ def _VOP1Op_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -2680,7 +2989,7 @@ def _VOP1Op_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -2700,7 +3009,7 @@ def _VOP1Op_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -2717,7 +3026,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -2735,7 +3044,7 @@ def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2746,7 +3055,7 @@ def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -2763,7 +3072,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -2780,7 +3089,7 @@ def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b64 = S0.b64 S0 = Reg(s0) D0 = Reg(d0) @@ -2791,7 +3100,7 @@ def _VOP1Op_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -2801,7 +3110,7 @@ def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -2811,7 +3120,7 @@ def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2821,7 +3130,7 @@ def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2831,7 +3140,7 @@ def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -2841,7 +3150,7 @@ def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2851,7 +3160,7 @@ def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2861,7 +3170,7 @@ def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2871,7 +3180,7 @@ def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2881,7 +3190,7 @@ def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -2898,7 +3207,7 @@ def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -2915,7 +3224,7 @@ def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -2930,7 +3239,7 @@ def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -2945,7 +3254,7 @@ def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2955,7 +3264,7 @@ def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -2970,7 +3279,7 @@ def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + -floor(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2980,7 +3289,7 @@ def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -2990,7 +3299,7 @@ def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -3000,7 +3309,7 @@ def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_snorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -3010,7 +3319,7 @@ def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_unorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -3020,11 +3329,7 @@ def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if n <= 16'0 then - # elsif n >= 16'255 then - # else - # endif); +def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 16'0; # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); @@ -3033,12 +3338,6 @@ def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) tmp = Reg(0) # --- compiled pseudocode --- - if n <= 0: - pass - elif n >= 255: - pass - else: - pass tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) @@ -3047,7 +3346,7 @@ def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.b32; # D0.b32 = S0.b32; # S0.b32 = tmp @@ -3062,7 +3361,7 @@ def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SDWA_SRC0_SEL == BYTE1.b3 then # D0.f32 = fp8_to_f32(S0[15 : 8].fp8) # elsif SDWA_SRC0_SEL == BYTE2.b3 then @@ -3089,7 +3388,7 @@ def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SDWA_SRC0_SEL == BYTE1.b3 then # D0.f32 = bf8_to_f32(S0[15 : 8].bf8) # elsif SDWA_SRC0_SEL == BYTE2.b3 then @@ -3116,7 +3415,7 @@ def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = SDWA_SRC0_SEL[1 : 0] == WORD1.b2 ? S0[31 : 16] : S0[15 : 0]; # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) @@ -3133,7 +3432,7 @@ def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = SDWA_SRC0_SEL[1 : 0] == WORD1.b2 ? S0[31 : 16] : S0[15 : 0]; # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) @@ -3150,7 +3449,7 @@ def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_PERMLANE16_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_PERMLANE16_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # for pass in 0 : 1 do # for lane in 0 : 15 do # tmp = VGPR[pass * 32 + lane][SRC0.u32]; @@ -3166,7 +3465,7 @@ def _VOP1Op_V_PERMLANE16_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP1Op_V_PERMLANE32_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_PERMLANE32_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # for lane in 0 : 31 do # tmp = VGPR[lane][SRC0.u32]; # endfor @@ -3179,7 +3478,7 @@ def _VOP1Op_V_PERMLANE32_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 32'F({ S0.b16, 16'0U }) # V_CMPX_{COMPF}_F16 16-bit float compare. Also writes EXEC. 0x30 to 0x3F # V_CMPX_{COMPF}_F32 32-bit float compare. Also writes EXEC. 0x50 to 0x5F @@ -3283,7 +3582,7 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_F32_BF16: _VOP1Op_V_CVT_F32_BF16, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -3297,7 +3596,7 @@ def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3308,7 +3607,7 @@ def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3319,7 +3618,7 @@ def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3330,7 +3629,7 @@ def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = fma(S0.f64, S1.f64, D0.f64) S0 = Reg(s0) S1 = Reg(s1) @@ -3342,7 +3641,7 @@ def _VOP2Op_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3353,7 +3652,7 @@ def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -3364,7 +3663,7 @@ def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -3375,7 +3674,7 @@ def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -3386,7 +3685,7 @@ def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -3397,7 +3696,7 @@ def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then @@ -3435,7 +3734,7 @@ def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then @@ -3477,7 +3776,7 @@ def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -3488,7 +3787,7 @@ def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -3499,7 +3798,7 @@ def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -3510,7 +3809,7 @@ def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -3521,7 +3820,7 @@ def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3532,7 +3831,7 @@ def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3543,7 +3842,7 @@ def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3554,7 +3853,7 @@ def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3565,7 +3864,7 @@ def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3576,7 +3875,7 @@ def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3587,7 +3886,7 @@ def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.f32; # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); @@ -3605,7 +3904,7 @@ def _VOP2Op_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -3617,7 +3916,7 @@ def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -3629,7 +3928,7 @@ def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. @@ -3649,7 +3948,7 @@ def _VOP2Op_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -3669,7 +3968,7 @@ def _VOP2Op_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32; # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -3689,7 +3988,7 @@ def _VOP2Op_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. @@ -3709,7 +4008,7 @@ def _VOP2Op_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -3729,7 +4028,7 @@ def _VOP2Op_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -3749,7 +4048,7 @@ def _VOP2Op_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3760,7 +4059,7 @@ def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3771,7 +4070,7 @@ def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3782,7 +4081,7 @@ def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3793,7 +4092,7 @@ def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.f16 * S1.f16 + D0.f16; # if OPSEL.u4[3] then # D0 = { tmp.f16, D0[15 : 0] } @@ -3814,7 +4113,7 @@ def _VOP2Op_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MADMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MADMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.f16 * SIMM16.f16 + S1.f16; S0 = Reg(s0) S1 = Reg(s1) @@ -3826,7 +4125,7 @@ def _VOP2Op_V_MADMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': scc & 1} return result -def _VOP2Op_V_MADAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MADAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.f16 * S1.f16 + SIMM16.f16; S0 = Reg(s0) S1 = Reg(s1) @@ -3838,7 +4137,7 @@ def _VOP2Op_V_MADAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 + S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -3849,7 +4148,7 @@ def _VOP2Op_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 - S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -3860,7 +4159,7 @@ def _VOP2Op_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S1.u16 - S0.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -3871,7 +4170,7 @@ def _VOP2Op_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -3882,7 +4181,7 @@ def _VOP2Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3893,7 +4192,7 @@ def _VOP2Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3904,7 +4203,7 @@ def _VOP2Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = (S1.i16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3915,7 +4214,7 @@ def _VOP2Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then @@ -3957,7 +4256,7 @@ def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then @@ -3995,7 +4294,7 @@ def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -4006,7 +4305,7 @@ def _VOP2Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -4017,7 +4316,7 @@ def _VOP2Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -4028,7 +4327,7 @@ def _VOP2Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -4039,7 +4338,7 @@ def _VOP2Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -4050,7 +4349,7 @@ def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4061,7 +4360,7 @@ def _VOP2Op_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4072,7 +4371,7 @@ def _VOP2Op_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4083,7 +4382,7 @@ def _VOP2Op_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -4101,7 +4400,7 @@ def _VOP2Op_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); @@ -4119,7 +4418,7 @@ def _VOP2Op_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); @@ -4141,7 +4440,7 @@ def _VOP2Op_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); @@ -4171,7 +4470,7 @@ def _VOP2Op_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -4182,7 +4481,7 @@ def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16); # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) S0 = Reg(s0) @@ -4195,7 +4494,7 @@ def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4271,7 +4570,7 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_XNOR_B32: _VOP2Op_V_XNOR_B32, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; @@ -4289,7 +4588,7 @@ def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -4305,7 +4604,7 @@ def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; @@ -4322,7 +4621,7 @@ def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; @@ -4339,7 +4638,7 @@ def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -4355,7 +4654,7 @@ def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -4371,7 +4670,7 @@ def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -4387,7 +4686,7 @@ def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; @@ -4404,7 +4703,7 @@ def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; @@ -4421,7 +4720,7 @@ def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; @@ -4439,7 +4738,7 @@ def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; @@ -4456,7 +4755,7 @@ def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; @@ -4473,7 +4772,7 @@ def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; @@ -4490,7 +4789,7 @@ def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; @@ -4507,7 +4806,7 @@ def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); @@ -4525,7 +4824,7 @@ def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; @@ -4542,7 +4841,7 @@ def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; @@ -4559,7 +4858,7 @@ def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -4576,7 +4875,7 @@ def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -4593,7 +4892,7 @@ def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 32'F(S0[15 : 0].bf16) * 32'F(S1[15 : 0].bf16); # tmp += 32'F(S0[31 : 16].bf16) * 32'F(S1[31 : 16].bf16); # tmp += S2.f32; @@ -4612,7 +4911,7 @@ def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = 16'F(v_minimum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)); # tmp[15 : 0].f16 = 16'F(v_minimum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)); # D0.b32 = tmp.b32 @@ -4629,7 +4928,7 @@ def _VOP3POp_V_PK_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = 16'F(v_maximum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)); # tmp[15 : 0].f16 = 16'F(v_maximum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)); # D0.b32 = tmp.b32 @@ -4646,7 +4945,7 @@ def _VOP3POp_V_PK_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -4665,7 +4964,7 @@ def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.i32; # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); @@ -4684,7 +4983,7 @@ def _VOP3POp_V_DOT2_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u16_to_u32(S0[15 : 0].u16) * u16_to_u32(S1[15 : 0].u16); # tmp += u16_to_u32(S0[31 : 16].u16) * u16_to_u32(S1[31 : 16].u16); @@ -4703,7 +5002,7 @@ def _VOP3POp_V_DOT2_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.i32; # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); @@ -4726,7 +5025,7 @@ def _VOP3POp_V_DOT4_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); @@ -4749,7 +5048,7 @@ def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT8_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT8_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.i32; # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); @@ -4780,7 +5079,7 @@ def _VOP3POp_V_DOT8_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); @@ -4811,7 +5110,7 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 64'B; # tmp[31 : 0].f32 = fma(S0[31 : 0].f32, S1[31 : 0].f32, S2[31 : 0].f32); # tmp[63 : 32].f32 = fma(S0[63 : 32].f32, S1[63 : 32].f32, S2[63 : 32].f32); @@ -4830,7 +5129,7 @@ def _VOP3POp_V_PK_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3POp_V_PK_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 64'B; # tmp[31 : 0].f32 = S0[31 : 0].f32 * S1[31 : 0].f32; # tmp[63 : 32].f32 = S0[63 : 32].f32 * S1[63 : 32].f32; @@ -4848,7 +5147,7 @@ def _VOP3POp_V_PK_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3POp_V_PK_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 64'B; # tmp[31 : 0].f32 = S0[31 : 0].f32 + S1[31 : 0].f32; # tmp[63 : 32].f32 = S0[63 : 32].f32 + S1[63 : 32].f32; @@ -4866,7 +5165,7 @@ def _VOP3POp_V_PK_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3POp_V_PK_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp0.u32 = S0.u32[OPSEL[0].i32 * 32 + 31 : OPSEL[0].i32 * 32]; # tmp1.u32 = S1.u32[OPSEL[1].i32 * 32 + 31 : OPSEL[1].i32 * 32]; # D0.u32[31 : 0] = tmp0.u32; @@ -4920,7 +5219,7 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_PK_MOV_B32: _VOP3POp_V_PK_MOV_B32, } -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -4957,6 +5256,7 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -4976,9 +5276,11 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -5035,7 +5337,7 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -5072,6 +5374,7 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -5091,9 +5394,11 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -5150,7 +5455,7 @@ def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -5187,6 +5492,7 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -5206,9 +5512,11 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -5265,13 +5573,14 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -5279,9 +5588,11 @@ def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -5290,6 +5601,7 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- @@ -5297,9 +5609,11 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -5308,6 +5622,7 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- @@ -5315,9 +5630,11 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5325,6 +5642,7 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- @@ -5332,9 +5650,11 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -5343,6 +5663,7 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- @@ -5350,9 +5671,11 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5360,6 +5683,7 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- @@ -5367,9 +5691,11 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5377,6 +5703,7 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- @@ -5384,9 +5711,11 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5395,6 +5724,7 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- @@ -5402,9 +5732,11 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5413,6 +5745,7 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- @@ -5420,9 +5753,11 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5431,6 +5766,7 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- @@ -5438,9 +5774,11 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5449,6 +5787,7 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- @@ -5456,9 +5795,11 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -5468,6 +5809,7 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- @@ -5475,9 +5817,11 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5486,6 +5830,7 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- @@ -5493,9 +5838,11 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -5505,6 +5852,7 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- @@ -5512,9 +5860,11 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -5524,6 +5874,7 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- @@ -5531,15 +5882,18 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -5547,9 +5901,11 @@ def _VOPCOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -5557,6 +5913,7 @@ def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -5565,9 +5922,11 @@ def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -5577,6 +5936,7 @@ def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- @@ -5585,9 +5945,11 @@ def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -5597,6 +5959,7 @@ def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- @@ -5605,9 +5968,11 @@ def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5616,6 +5981,7 @@ def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- @@ -5624,9 +5990,11 @@ def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -5636,6 +6004,7 @@ def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- @@ -5644,9 +6013,11 @@ def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5655,6 +6026,7 @@ def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- @@ -5663,9 +6035,11 @@ def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5674,6 +6048,7 @@ def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- @@ -5682,9 +6057,11 @@ def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5694,6 +6071,7 @@ def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- @@ -5702,9 +6080,11 @@ def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5713,6 +6093,7 @@ def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- @@ -5721,9 +6102,11 @@ def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5733,6 +6116,7 @@ def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- @@ -5741,9 +6125,11 @@ def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5753,6 +6139,7 @@ def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- @@ -5761,9 +6148,11 @@ def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -5773,6 +6162,7 @@ def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- @@ -5781,9 +6171,11 @@ def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5793,6 +6185,7 @@ def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- @@ -5801,9 +6194,11 @@ def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -5814,6 +6209,7 @@ def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- @@ -5822,9 +6218,11 @@ def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -5835,6 +6233,7 @@ def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- @@ -5843,9 +6242,11 @@ def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -5853,6 +6254,7 @@ def _VOPCOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -5861,15 +6263,18 @@ def _VOPCOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -5877,9 +6282,11 @@ def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -5888,6 +6295,7 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- @@ -5895,9 +6303,11 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -5906,6 +6316,7 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- @@ -5913,9 +6324,11 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5923,6 +6336,7 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- @@ -5930,9 +6344,11 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -5941,6 +6357,7 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- @@ -5948,9 +6365,11 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5958,6 +6377,7 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- @@ -5965,9 +6385,11 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5975,6 +6397,7 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- @@ -5982,9 +6405,11 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -5993,6 +6418,7 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- @@ -6000,9 +6426,11 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -6011,6 +6439,7 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- @@ -6018,9 +6447,11 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -6029,6 +6460,7 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- @@ -6036,9 +6468,11 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -6047,6 +6481,7 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- @@ -6054,9 +6489,11 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -6066,6 +6503,7 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- @@ -6073,9 +6511,11 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -6084,6 +6524,7 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- @@ -6091,9 +6532,11 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -6103,6 +6546,7 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- @@ -6110,9 +6554,11 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -6122,6 +6568,7 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- @@ -6129,15 +6576,18 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -6145,9 +6595,11 @@ def _VOPCOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -6155,6 +6607,7 @@ def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -6163,9 +6616,11 @@ def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -6175,6 +6630,7 @@ def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- @@ -6183,9 +6639,11 @@ def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -6195,6 +6653,7 @@ def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- @@ -6203,9 +6662,11 @@ def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6214,6 +6675,7 @@ def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- @@ -6222,9 +6684,11 @@ def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -6234,6 +6698,7 @@ def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- @@ -6242,9 +6707,11 @@ def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6253,6 +6720,7 @@ def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- @@ -6261,9 +6729,11 @@ def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6272,6 +6742,7 @@ def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- @@ -6280,9 +6751,11 @@ def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -6292,6 +6765,7 @@ def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- @@ -6300,9 +6774,11 @@ def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6311,6 +6787,7 @@ def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- @@ -6319,9 +6796,11 @@ def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -6331,6 +6810,7 @@ def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- @@ -6339,9 +6819,11 @@ def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -6351,6 +6833,7 @@ def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- @@ -6359,9 +6842,11 @@ def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -6371,6 +6856,7 @@ def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- @@ -6379,9 +6865,11 @@ def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -6391,6 +6879,7 @@ def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- @@ -6399,9 +6888,11 @@ def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -6412,6 +6903,7 @@ def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- @@ -6420,9 +6912,11 @@ def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -6433,6 +6927,7 @@ def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- @@ -6441,9 +6936,11 @@ def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -6451,6 +6948,7 @@ def _VOPCOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -6459,15 +6957,18 @@ def _VOPCOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -6475,9 +6976,11 @@ def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -6486,6 +6989,7 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- @@ -6493,9 +6997,11 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -6504,6 +7010,7 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- @@ -6511,9 +7018,11 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6521,6 +7030,7 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- @@ -6528,9 +7038,11 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -6539,6 +7051,7 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- @@ -6546,9 +7059,11 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6556,6 +7071,7 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- @@ -6563,9 +7079,11 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6573,6 +7091,7 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- @@ -6580,9 +7099,11 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -6591,6 +7112,7 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- @@ -6598,9 +7120,11 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -6609,6 +7133,7 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- @@ -6616,9 +7141,11 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -6627,6 +7154,7 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- @@ -6634,9 +7162,11 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -6645,6 +7175,7 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- @@ -6652,9 +7183,11 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -6664,6 +7197,7 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- @@ -6671,9 +7205,11 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -6682,6 +7218,7 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- @@ -6689,9 +7226,11 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -6701,6 +7240,7 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- @@ -6708,9 +7248,11 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -6720,6 +7262,7 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- @@ -6727,15 +7270,18 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -6743,9 +7289,11 @@ def _VOPCOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -6753,6 +7301,7 @@ def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -6761,9 +7310,11 @@ def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -6773,6 +7324,7 @@ def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- @@ -6781,9 +7333,11 @@ def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -6793,6 +7347,7 @@ def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- @@ -6801,9 +7356,11 @@ def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6812,6 +7369,7 @@ def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- @@ -6820,9 +7378,11 @@ def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -6832,6 +7392,7 @@ def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- @@ -6840,9 +7401,11 @@ def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6851,6 +7414,7 @@ def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- @@ -6859,9 +7423,11 @@ def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6870,6 +7436,7 @@ def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- @@ -6878,9 +7445,11 @@ def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -6890,6 +7459,7 @@ def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- @@ -6898,9 +7468,11 @@ def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6909,6 +7481,7 @@ def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- @@ -6917,9 +7490,11 @@ def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -6929,6 +7504,7 @@ def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- @@ -6937,9 +7513,11 @@ def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -6949,6 +7527,7 @@ def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- @@ -6957,9 +7536,11 @@ def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -6969,6 +7550,7 @@ def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- @@ -6977,9 +7559,11 @@ def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -6989,6 +7573,7 @@ def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- @@ -6997,9 +7582,11 @@ def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -7010,6 +7597,7 @@ def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- @@ -7018,9 +7606,11 @@ def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -7031,6 +7621,7 @@ def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- @@ -7039,9 +7630,11 @@ def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -7049,6 +7642,7 @@ def _VOPCOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7057,15 +7651,18 @@ def _VOPCOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7073,9 +7670,11 @@ def _VOPCOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -7084,6 +7683,7 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- @@ -7091,9 +7691,11 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -7102,6 +7704,7 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- @@ -7109,9 +7712,11 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7119,6 +7724,7 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- @@ -7126,9 +7732,11 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -7137,6 +7745,7 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- @@ -7144,9 +7753,11 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -7155,6 +7766,7 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- @@ -7162,9 +7774,11 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7172,6 +7786,7 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- @@ -7179,15 +7794,18 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7195,15 +7813,18 @@ def _VOPCOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7211,9 +7832,11 @@ def _VOPCOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -7222,6 +7845,7 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- @@ -7229,9 +7853,11 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -7240,6 +7866,7 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- @@ -7247,9 +7874,11 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7257,6 +7886,7 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- @@ -7264,9 +7894,11 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -7275,6 +7907,7 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- @@ -7282,9 +7915,11 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -7293,6 +7928,7 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- @@ -7300,9 +7936,11 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7310,6 +7948,7 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- @@ -7317,15 +7956,18 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7333,9 +7975,11 @@ def _VOPCOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -7343,6 +7987,7 @@ def _VOPCOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7351,9 +7996,11 @@ def _VOPCOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -7363,6 +8010,7 @@ def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- @@ -7371,9 +8019,11 @@ def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -7383,6 +8033,7 @@ def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- @@ -7391,9 +8042,11 @@ def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7402,6 +8055,7 @@ def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- @@ -7410,9 +8064,11 @@ def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -7422,6 +8078,7 @@ def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- @@ -7430,9 +8087,11 @@ def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -7442,6 +8101,7 @@ def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- @@ -7450,9 +8110,11 @@ def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7461,6 +8123,7 @@ def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- @@ -7469,9 +8132,11 @@ def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -7479,6 +8144,7 @@ def _VOPCOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7487,9 +8153,11 @@ def _VOPCOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -7497,6 +8165,7 @@ def _VOPCOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7505,9 +8174,11 @@ def _VOPCOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -7517,6 +8188,7 @@ def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- @@ -7525,9 +8197,11 @@ def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -7537,6 +8211,7 @@ def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- @@ -7545,9 +8220,11 @@ def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7556,6 +8233,7 @@ def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- @@ -7564,9 +8242,11 @@ def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -7576,6 +8256,7 @@ def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- @@ -7584,9 +8265,11 @@ def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -7596,6 +8279,7 @@ def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- @@ -7604,9 +8288,11 @@ def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7615,6 +8301,7 @@ def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- @@ -7623,9 +8310,11 @@ def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -7633,6 +8322,7 @@ def _VOPCOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7641,15 +8331,18 @@ def _VOPCOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7657,9 +8350,11 @@ def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -7668,6 +8363,7 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- @@ -7675,9 +8371,11 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -7686,6 +8384,7 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- @@ -7693,9 +8392,11 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7703,6 +8404,7 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- @@ -7710,9 +8412,11 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -7721,6 +8425,7 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- @@ -7728,9 +8433,11 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -7739,6 +8446,7 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- @@ -7746,9 +8454,11 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7756,6 +8466,7 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- @@ -7763,15 +8474,18 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7779,15 +8493,18 @@ def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7795,9 +8512,11 @@ def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -7806,6 +8525,7 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- @@ -7813,9 +8533,11 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -7824,6 +8546,7 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- @@ -7831,9 +8554,11 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7841,6 +8566,7 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- @@ -7848,9 +8574,11 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -7859,6 +8587,7 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- @@ -7866,9 +8595,11 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -7877,6 +8608,7 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- @@ -7884,9 +8616,11 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7894,6 +8628,7 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- @@ -7901,15 +8636,18 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7917,9 +8655,11 @@ def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -7927,6 +8667,7 @@ def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7935,9 +8676,11 @@ def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -7947,6 +8690,7 @@ def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- @@ -7955,9 +8699,11 @@ def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -7967,6 +8713,7 @@ def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- @@ -7975,9 +8722,11 @@ def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7986,6 +8735,7 @@ def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- @@ -7994,9 +8744,11 @@ def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -8006,6 +8758,7 @@ def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- @@ -8014,9 +8767,11 @@ def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -8026,6 +8781,7 @@ def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- @@ -8034,9 +8790,11 @@ def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8045,6 +8803,7 @@ def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- @@ -8053,9 +8812,11 @@ def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -8063,6 +8824,7 @@ def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -8071,9 +8833,11 @@ def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -8081,6 +8845,7 @@ def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -8089,9 +8854,11 @@ def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -8101,6 +8868,7 @@ def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- @@ -8109,9 +8877,11 @@ def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -8121,6 +8891,7 @@ def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- @@ -8129,9 +8900,11 @@ def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8140,6 +8913,7 @@ def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- @@ -8148,9 +8922,11 @@ def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -8160,6 +8936,7 @@ def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- @@ -8168,9 +8945,11 @@ def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -8180,6 +8959,7 @@ def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- @@ -8188,9 +8968,11 @@ def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8199,6 +8981,7 @@ def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- @@ -8207,9 +8990,11 @@ def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -8217,6 +9002,7 @@ def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -8225,15 +9011,18 @@ def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -8241,9 +9030,11 @@ def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -8252,6 +9043,7 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- @@ -8259,9 +9051,11 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -8270,6 +9064,7 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- @@ -8277,9 +9072,11 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8287,6 +9084,7 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- @@ -8294,9 +9092,11 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -8305,6 +9105,7 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- @@ -8312,9 +9113,11 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -8323,6 +9126,7 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- @@ -8330,9 +9134,11 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8340,6 +9146,7 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- @@ -8347,15 +9154,18 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -8363,15 +9173,18 @@ def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -8379,9 +9192,11 @@ def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -8390,6 +9205,7 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- @@ -8397,9 +9213,11 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -8408,6 +9226,7 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- @@ -8415,9 +9234,11 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8425,6 +9246,7 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- @@ -8432,9 +9254,11 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -8443,6 +9267,7 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- @@ -8450,9 +9275,11 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -8461,6 +9288,7 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- @@ -8468,9 +9296,11 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8478,6 +9308,7 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- @@ -8485,15 +9316,18 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -8501,9 +9335,11 @@ def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -8511,6 +9347,7 @@ def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -8519,9 +9356,11 @@ def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -8531,6 +9370,7 @@ def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- @@ -8539,9 +9379,11 @@ def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -8551,6 +9393,7 @@ def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- @@ -8559,9 +9402,11 @@ def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8570,6 +9415,7 @@ def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- @@ -8578,9 +9424,11 @@ def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -8590,6 +9438,7 @@ def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- @@ -8598,9 +9447,11 @@ def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -8610,6 +9461,7 @@ def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- @@ -8618,9 +9470,11 @@ def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8629,6 +9483,7 @@ def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- @@ -8637,9 +9492,11 @@ def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -8647,6 +9504,7 @@ def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -8655,9 +9513,11 @@ def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -8665,6 +9525,7 @@ def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -8673,9 +9534,11 @@ def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -8685,6 +9548,7 @@ def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- @@ -8693,9 +9557,11 @@ def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -8705,6 +9571,7 @@ def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- @@ -8713,9 +9580,11 @@ def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8724,6 +9593,7 @@ def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- @@ -8732,9 +9602,11 @@ def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -8744,6 +9616,7 @@ def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- @@ -8752,9 +9625,11 @@ def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -8764,6 +9639,7 @@ def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- @@ -8772,9 +9648,11 @@ def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8783,6 +9661,7 @@ def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- @@ -8791,9 +9670,11 @@ def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -8811,6 +9692,7 @@ def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC = Reg(exec_mask) tmp = Reg(0) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) @@ -8828,6 +9710,8 @@ def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result VOPCOp_FUNCTIONS = { @@ -9031,7 +9915,7 @@ VOPCOp_FUNCTIONS = { VOPCOp.V_CMPX_T_U64: _VOPCOp_V_CMPX_T_U64, } -def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9068,6 +9952,7 @@ def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -9086,9 +9971,11 @@ def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9144,7 +10031,7 @@ def _VOP3AOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9181,6 +10068,7 @@ def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -9199,9 +10087,11 @@ def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9257,7 +10147,7 @@ def _VOP3AOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9294,6 +10184,7 @@ def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -9312,9 +10203,11 @@ def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9370,22 +10263,25 @@ def _VOP3AOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3AOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -9394,15 +10290,18 @@ def _VOP3AOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -9411,15 +10310,18 @@ def _VOP3AOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9427,15 +10329,18 @@ def _VOP3AOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -9444,15 +10349,18 @@ def _VOP3AOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9460,15 +10368,18 @@ def _VOP3AOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9476,15 +10387,18 @@ def _VOP3AOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -9493,15 +10407,18 @@ def _VOP3AOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -9510,15 +10427,18 @@ def _VOP3AOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -9527,15 +10447,18 @@ def _VOP3AOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -9544,15 +10467,18 @@ def _VOP3AOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -9562,15 +10488,18 @@ def _VOP3AOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -9579,15 +10508,18 @@ def _VOP3AOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -9597,15 +10529,18 @@ def _VOP3AOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -9615,30 +10550,36 @@ def _VOP3AOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -9646,6 +10587,7 @@ def _VOP3AOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -9653,9 +10595,11 @@ def _VOP3AOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -9665,6 +10609,7 @@ def _VOP3AOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- @@ -9672,9 +10617,11 @@ def _VOP3AOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -9684,6 +10631,7 @@ def _VOP3AOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- @@ -9691,9 +10639,11 @@ def _VOP3AOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9702,6 +10652,7 @@ def _VOP3AOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- @@ -9709,9 +10660,11 @@ def _VOP3AOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -9721,6 +10674,7 @@ def _VOP3AOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- @@ -9728,9 +10682,11 @@ def _VOP3AOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9739,6 +10695,7 @@ def _VOP3AOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- @@ -9746,9 +10703,11 @@ def _VOP3AOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9757,6 +10716,7 @@ def _VOP3AOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- @@ -9764,9 +10724,11 @@ def _VOP3AOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -9776,6 +10738,7 @@ def _VOP3AOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- @@ -9783,9 +10746,11 @@ def _VOP3AOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9794,6 +10759,7 @@ def _VOP3AOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- @@ -9801,9 +10767,11 @@ def _VOP3AOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -9813,6 +10781,7 @@ def _VOP3AOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- @@ -9820,9 +10789,11 @@ def _VOP3AOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -9832,6 +10803,7 @@ def _VOP3AOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- @@ -9839,9 +10811,11 @@ def _VOP3AOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -9851,6 +10825,7 @@ def _VOP3AOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- @@ -9858,9 +10833,11 @@ def _VOP3AOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -9870,6 +10847,7 @@ def _VOP3AOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- @@ -9877,9 +10855,11 @@ def _VOP3AOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -9890,6 +10870,7 @@ def _VOP3AOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- @@ -9897,9 +10878,11 @@ def _VOP3AOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -9910,6 +10893,7 @@ def _VOP3AOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- @@ -9917,9 +10901,11 @@ def _VOP3AOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -9927,6 +10913,7 @@ def _VOP3AOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -9934,24 +10921,29 @@ def _VOP3AOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -9960,15 +10952,18 @@ def _VOP3AOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -9977,15 +10972,18 @@ def _VOP3AOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9993,15 +10991,18 @@ def _VOP3AOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -10010,15 +11011,18 @@ def _VOP3AOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10026,15 +11030,18 @@ def _VOP3AOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10042,15 +11049,18 @@ def _VOP3AOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -10059,15 +11069,18 @@ def _VOP3AOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -10076,15 +11089,18 @@ def _VOP3AOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -10093,15 +11109,18 @@ def _VOP3AOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -10110,15 +11129,18 @@ def _VOP3AOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -10128,15 +11150,18 @@ def _VOP3AOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -10145,15 +11170,18 @@ def _VOP3AOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -10163,15 +11191,18 @@ def _VOP3AOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -10181,30 +11212,36 @@ def _VOP3AOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -10212,6 +11249,7 @@ def _VOP3AOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -10219,9 +11257,11 @@ def _VOP3AOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -10231,6 +11271,7 @@ def _VOP3AOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- @@ -10238,9 +11279,11 @@ def _VOP3AOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -10250,6 +11293,7 @@ def _VOP3AOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- @@ -10257,9 +11301,11 @@ def _VOP3AOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10268,6 +11314,7 @@ def _VOP3AOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- @@ -10275,9 +11322,11 @@ def _VOP3AOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -10287,6 +11336,7 @@ def _VOP3AOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- @@ -10294,9 +11344,11 @@ def _VOP3AOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10305,6 +11357,7 @@ def _VOP3AOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- @@ -10312,9 +11365,11 @@ def _VOP3AOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10323,6 +11378,7 @@ def _VOP3AOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- @@ -10330,9 +11386,11 @@ def _VOP3AOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -10342,6 +11400,7 @@ def _VOP3AOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- @@ -10349,9 +11408,11 @@ def _VOP3AOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10360,6 +11421,7 @@ def _VOP3AOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- @@ -10367,9 +11429,11 @@ def _VOP3AOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -10379,6 +11443,7 @@ def _VOP3AOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- @@ -10386,9 +11451,11 @@ def _VOP3AOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -10398,6 +11465,7 @@ def _VOP3AOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- @@ -10405,9 +11473,11 @@ def _VOP3AOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -10417,6 +11487,7 @@ def _VOP3AOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- @@ -10424,9 +11495,11 @@ def _VOP3AOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -10436,6 +11509,7 @@ def _VOP3AOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- @@ -10443,9 +11517,11 @@ def _VOP3AOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -10456,6 +11532,7 @@ def _VOP3AOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- @@ -10463,9 +11540,11 @@ def _VOP3AOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -10476,6 +11555,7 @@ def _VOP3AOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- @@ -10483,9 +11563,11 @@ def _VOP3AOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -10493,6 +11575,7 @@ def _VOP3AOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -10500,24 +11583,29 @@ def _VOP3AOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -10526,15 +11614,18 @@ def _VOP3AOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -10543,15 +11634,18 @@ def _VOP3AOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10559,15 +11653,18 @@ def _VOP3AOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -10576,15 +11673,18 @@ def _VOP3AOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10592,15 +11692,18 @@ def _VOP3AOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10608,15 +11711,18 @@ def _VOP3AOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -10625,15 +11731,18 @@ def _VOP3AOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -10642,15 +11751,18 @@ def _VOP3AOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -10659,15 +11771,18 @@ def _VOP3AOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -10676,15 +11791,18 @@ def _VOP3AOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -10694,15 +11812,18 @@ def _VOP3AOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -10711,15 +11832,18 @@ def _VOP3AOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -10729,15 +11853,18 @@ def _VOP3AOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -10747,30 +11874,36 @@ def _VOP3AOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -10778,6 +11911,7 @@ def _VOP3AOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -10785,9 +11919,11 @@ def _VOP3AOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -10797,6 +11933,7 @@ def _VOP3AOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- @@ -10804,9 +11941,11 @@ def _VOP3AOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -10816,6 +11955,7 @@ def _VOP3AOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- @@ -10823,9 +11963,11 @@ def _VOP3AOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10834,6 +11976,7 @@ def _VOP3AOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- @@ -10841,9 +11984,11 @@ def _VOP3AOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -10853,6 +11998,7 @@ def _VOP3AOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- @@ -10860,9 +12006,11 @@ def _VOP3AOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10871,6 +12019,7 @@ def _VOP3AOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- @@ -10878,9 +12027,11 @@ def _VOP3AOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10889,6 +12040,7 @@ def _VOP3AOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- @@ -10896,9 +12048,11 @@ def _VOP3AOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -10908,6 +12062,7 @@ def _VOP3AOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- @@ -10915,9 +12070,11 @@ def _VOP3AOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10926,6 +12083,7 @@ def _VOP3AOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- @@ -10933,9 +12091,11 @@ def _VOP3AOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -10945,6 +12105,7 @@ def _VOP3AOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- @@ -10952,9 +12113,11 @@ def _VOP3AOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -10964,6 +12127,7 @@ def _VOP3AOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- @@ -10971,9 +12135,11 @@ def _VOP3AOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -10983,6 +12149,7 @@ def _VOP3AOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- @@ -10990,9 +12157,11 @@ def _VOP3AOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -11002,6 +12171,7 @@ def _VOP3AOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- @@ -11009,9 +12179,11 @@ def _VOP3AOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -11022,6 +12194,7 @@ def _VOP3AOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- @@ -11029,9 +12202,11 @@ def _VOP3AOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -11042,6 +12217,7 @@ def _VOP3AOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- @@ -11049,9 +12225,11 @@ def _VOP3AOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -11059,6 +12237,7 @@ def _VOP3AOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -11066,24 +12245,29 @@ def _VOP3AOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -11092,15 +12276,18 @@ def _VOP3AOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -11109,15 +12296,18 @@ def _VOP3AOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11125,15 +12315,18 @@ def _VOP3AOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -11142,15 +12335,18 @@ def _VOP3AOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -11159,15 +12355,18 @@ def _VOP3AOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11175,45 +12374,54 @@ def _VOP3AOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -11222,15 +12430,18 @@ def _VOP3AOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -11239,15 +12450,18 @@ def _VOP3AOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11255,15 +12469,18 @@ def _VOP3AOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -11272,15 +12489,18 @@ def _VOP3AOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -11289,15 +12509,18 @@ def _VOP3AOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11305,30 +12528,36 @@ def _VOP3AOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -11336,6 +12565,7 @@ def _VOP3AOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -11343,9 +12573,11 @@ def _VOP3AOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -11355,6 +12587,7 @@ def _VOP3AOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- @@ -11362,9 +12595,11 @@ def _VOP3AOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -11374,6 +12609,7 @@ def _VOP3AOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- @@ -11381,9 +12617,11 @@ def _VOP3AOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11392,6 +12630,7 @@ def _VOP3AOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- @@ -11399,9 +12638,11 @@ def _VOP3AOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -11411,6 +12652,7 @@ def _VOP3AOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- @@ -11418,9 +12660,11 @@ def _VOP3AOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -11430,6 +12674,7 @@ def _VOP3AOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- @@ -11437,9 +12682,11 @@ def _VOP3AOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11448,6 +12695,7 @@ def _VOP3AOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- @@ -11455,9 +12703,11 @@ def _VOP3AOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -11465,6 +12715,7 @@ def _VOP3AOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -11472,9 +12723,11 @@ def _VOP3AOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -11482,6 +12735,7 @@ def _VOP3AOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -11489,9 +12743,11 @@ def _VOP3AOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -11501,6 +12757,7 @@ def _VOP3AOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- @@ -11508,9 +12765,11 @@ def _VOP3AOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -11520,6 +12779,7 @@ def _VOP3AOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- @@ -11527,9 +12787,11 @@ def _VOP3AOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11538,6 +12800,7 @@ def _VOP3AOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- @@ -11545,9 +12808,11 @@ def _VOP3AOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -11557,6 +12822,7 @@ def _VOP3AOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- @@ -11564,9 +12830,11 @@ def _VOP3AOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -11576,6 +12844,7 @@ def _VOP3AOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- @@ -11583,9 +12852,11 @@ def _VOP3AOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11594,6 +12865,7 @@ def _VOP3AOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- @@ -11601,9 +12873,11 @@ def _VOP3AOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -11611,6 +12885,7 @@ def _VOP3AOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -11618,24 +12893,29 @@ def _VOP3AOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -11644,15 +12924,18 @@ def _VOP3AOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -11661,15 +12944,18 @@ def _VOP3AOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11677,15 +12963,18 @@ def _VOP3AOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -11694,15 +12983,18 @@ def _VOP3AOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -11711,15 +13003,18 @@ def _VOP3AOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11727,45 +13022,54 @@ def _VOP3AOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -11774,15 +13078,18 @@ def _VOP3AOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -11791,15 +13098,18 @@ def _VOP3AOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11807,15 +13117,18 @@ def _VOP3AOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -11824,15 +13137,18 @@ def _VOP3AOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -11841,15 +13157,18 @@ def _VOP3AOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11857,30 +13176,36 @@ def _VOP3AOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -11888,6 +13213,7 @@ def _VOP3AOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -11895,9 +13221,11 @@ def _VOP3AOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -11907,6 +13235,7 @@ def _VOP3AOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- @@ -11914,9 +13243,11 @@ def _VOP3AOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -11926,6 +13257,7 @@ def _VOP3AOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- @@ -11933,9 +13265,11 @@ def _VOP3AOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11944,6 +13278,7 @@ def _VOP3AOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- @@ -11951,9 +13286,11 @@ def _VOP3AOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -11963,6 +13300,7 @@ def _VOP3AOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- @@ -11970,9 +13308,11 @@ def _VOP3AOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -11982,6 +13322,7 @@ def _VOP3AOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- @@ -11989,9 +13330,11 @@ def _VOP3AOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12000,6 +13343,7 @@ def _VOP3AOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- @@ -12007,9 +13351,11 @@ def _VOP3AOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -12017,6 +13363,7 @@ def _VOP3AOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -12024,9 +13371,11 @@ def _VOP3AOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -12034,6 +13383,7 @@ def _VOP3AOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -12041,9 +13391,11 @@ def _VOP3AOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -12053,6 +13405,7 @@ def _VOP3AOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- @@ -12060,9 +13413,11 @@ def _VOP3AOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -12072,6 +13427,7 @@ def _VOP3AOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- @@ -12079,9 +13435,11 @@ def _VOP3AOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12090,6 +13448,7 @@ def _VOP3AOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- @@ -12097,9 +13456,11 @@ def _VOP3AOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -12109,6 +13470,7 @@ def _VOP3AOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- @@ -12116,9 +13478,11 @@ def _VOP3AOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -12128,6 +13492,7 @@ def _VOP3AOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- @@ -12135,9 +13500,11 @@ def _VOP3AOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12146,6 +13513,7 @@ def _VOP3AOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- @@ -12153,9 +13521,11 @@ def _VOP3AOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -12163,6 +13533,7 @@ def _VOP3AOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -12170,24 +13541,29 @@ def _VOP3AOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -12196,15 +13572,18 @@ def _VOP3AOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -12213,15 +13592,18 @@ def _VOP3AOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12229,15 +13611,18 @@ def _VOP3AOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -12246,15 +13631,18 @@ def _VOP3AOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -12263,15 +13651,18 @@ def _VOP3AOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12279,45 +13670,54 @@ def _VOP3AOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -12326,15 +13726,18 @@ def _VOP3AOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -12343,15 +13746,18 @@ def _VOP3AOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12359,15 +13765,18 @@ def _VOP3AOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -12376,15 +13785,18 @@ def _VOP3AOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -12393,15 +13805,18 @@ def _VOP3AOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12409,30 +13824,36 @@ def _VOP3AOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -12440,6 +13861,7 @@ def _VOP3AOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -12447,9 +13869,11 @@ def _VOP3AOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -12459,6 +13883,7 @@ def _VOP3AOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- @@ -12466,9 +13891,11 @@ def _VOP3AOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -12478,6 +13905,7 @@ def _VOP3AOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- @@ -12485,9 +13913,11 @@ def _VOP3AOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12496,6 +13926,7 @@ def _VOP3AOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- @@ -12503,9 +13934,11 @@ def _VOP3AOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -12515,6 +13948,7 @@ def _VOP3AOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- @@ -12522,9 +13956,11 @@ def _VOP3AOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -12534,6 +13970,7 @@ def _VOP3AOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- @@ -12541,9 +13978,11 @@ def _VOP3AOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12552,6 +13991,7 @@ def _VOP3AOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- @@ -12559,9 +13999,11 @@ def _VOP3AOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -12569,6 +14011,7 @@ def _VOP3AOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -12576,9 +14019,11 @@ def _VOP3AOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -12586,6 +14031,7 @@ def _VOP3AOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -12593,9 +14039,11 @@ def _VOP3AOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -12605,6 +14053,7 @@ def _VOP3AOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- @@ -12612,9 +14061,11 @@ def _VOP3AOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -12624,6 +14075,7 @@ def _VOP3AOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- @@ -12631,9 +14083,11 @@ def _VOP3AOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12642,6 +14096,7 @@ def _VOP3AOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- @@ -12649,9 +14104,11 @@ def _VOP3AOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -12661,6 +14118,7 @@ def _VOP3AOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- @@ -12668,9 +14126,11 @@ def _VOP3AOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -12680,6 +14140,7 @@ def _VOP3AOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- @@ -12687,9 +14148,11 @@ def _VOP3AOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12698,6 +14161,7 @@ def _VOP3AOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- @@ -12705,9 +14169,11 @@ def _VOP3AOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -12725,6 +14191,7 @@ def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC = Reg(exec_mask) tmp = Reg(0) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) @@ -12741,9 +14208,11 @@ def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -12753,7 +14222,7 @@ def _VOP3AOp_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'I; # if EXEC == 0x0LL then # lane = 0; @@ -12777,7 +14246,7 @@ def _VOP3AOp_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3AOp_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -12787,7 +14256,7 @@ def _VOP3AOp_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -12798,7 +14267,7 @@ def _VOP3AOp_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -12808,7 +14277,7 @@ def _VOP3AOp_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12818,7 +14287,7 @@ def _VOP3AOp_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -12828,7 +14297,7 @@ def _VOP3AOp_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -12838,7 +14307,7 @@ def _VOP3AOp_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -12848,7 +14317,7 @@ def _VOP3AOp_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -12858,7 +14327,7 @@ def _VOP3AOp_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -12868,7 +14337,7 @@ def _VOP3AOp_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -12878,7 +14347,7 @@ def _VOP3AOp_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -12888,7 +14357,7 @@ def _VOP3AOp_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -12899,7 +14368,7 @@ def _VOP3AOp_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12909,7 +14378,7 @@ def _VOP3AOp_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12919,7 +14388,7 @@ def _VOP3AOp_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12929,7 +14398,7 @@ def _VOP3AOp_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12939,7 +14408,7 @@ def _VOP3AOp_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -12949,7 +14418,7 @@ def _VOP3AOp_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12960,7 +14429,7 @@ def _VOP3AOp_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -12971,7 +14440,7 @@ def _VOP3AOp_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _VOP3AOp_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -12987,7 +14456,7 @@ def _VOP3AOp_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3AOp_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -13003,7 +14472,7 @@ def _VOP3AOp_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _VOP3AOp_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -13019,7 +14488,7 @@ def _VOP3AOp_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _VOP3AOp_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13029,7 +14498,7 @@ def _VOP3AOp_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13039,7 +14508,7 @@ def _VOP3AOp_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -13054,7 +14523,7 @@ def _VOP3AOp_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -13069,7 +14538,7 @@ def _VOP3AOp_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -13084,7 +14553,7 @@ def _VOP3AOp_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13094,7 +14563,7 @@ def _VOP3AOp_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13104,7 +14573,7 @@ def _VOP3AOp_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -13114,7 +14583,7 @@ def _VOP3AOp_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -13125,7 +14594,7 @@ def _VOP3AOp_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13135,7 +14604,7 @@ def _VOP3AOp_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -13146,7 +14615,7 @@ def _VOP3AOp_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -13157,7 +14626,7 @@ def _VOP3AOp_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13167,7 +14636,7 @@ def _VOP3AOp_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -13178,7 +14647,7 @@ def _VOP3AOp_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3AOp_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -13188,7 +14657,7 @@ def _VOP3AOp_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -13198,7 +14667,7 @@ def _VOP3AOp_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -13208,7 +14677,7 @@ def _VOP3AOp_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -13218,7 +14687,7 @@ def _VOP3AOp_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -13238,7 +14707,7 @@ def _VOP3AOp_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -13258,7 +14727,7 @@ def _VOP3AOp_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -13278,7 +14747,7 @@ def _VOP3AOp_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -13295,7 +14764,7 @@ def _VOP3AOp_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -13313,7 +14782,7 @@ def _VOP3AOp_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3AOp_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -13324,7 +14793,7 @@ def _VOP3AOp_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _VOP3AOp_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -13341,7 +14810,7 @@ def _VOP3AOp_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -13358,7 +14827,7 @@ def _VOP3AOp_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b64 = S0.b64 S0 = Reg(s0) D0 = Reg(d0) @@ -13369,7 +14838,7 @@ def _VOP3AOp_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -13379,7 +14848,7 @@ def _VOP3AOp_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -13389,7 +14858,7 @@ def _VOP3AOp_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13399,7 +14868,7 @@ def _VOP3AOp_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13409,7 +14878,7 @@ def _VOP3AOp_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -13419,7 +14888,7 @@ def _VOP3AOp_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13429,7 +14898,7 @@ def _VOP3AOp_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13439,7 +14908,7 @@ def _VOP3AOp_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13449,7 +14918,7 @@ def _VOP3AOp_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13459,7 +14928,7 @@ def _VOP3AOp_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -13473,7 +14942,7 @@ def _VOP3AOp_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3AOp_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -13484,7 +14953,7 @@ def _VOP3AOp_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -13495,7 +14964,7 @@ def _VOP3AOp_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -13506,7 +14975,7 @@ def _VOP3AOp_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = fma(S0.f64, S1.f64, D0.f64) S0 = Reg(s0) S1 = Reg(s1) @@ -13518,7 +14987,7 @@ def _VOP3AOp_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3AOp_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -13529,7 +14998,7 @@ def _VOP3AOp_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -13540,7 +15009,7 @@ def _VOP3AOp_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -13551,7 +15020,7 @@ def _VOP3AOp_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -13562,7 +15031,7 @@ def _VOP3AOp_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -13573,7 +15042,7 @@ def _VOP3AOp_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then @@ -13611,7 +15080,7 @@ def _VOP3AOp_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then @@ -13653,7 +15122,7 @@ def _VOP3AOp_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -13664,7 +15133,7 @@ def _VOP3AOp_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -13675,7 +15144,7 @@ def _VOP3AOp_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -13686,7 +15155,7 @@ def _VOP3AOp_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -13697,7 +15166,7 @@ def _VOP3AOp_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13708,7 +15177,7 @@ def _VOP3AOp_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13719,7 +15188,7 @@ def _VOP3AOp_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13730,7 +15199,7 @@ def _VOP3AOp_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13741,7 +15210,7 @@ def _VOP3AOp_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13752,7 +15221,7 @@ def _VOP3AOp_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13763,7 +15232,7 @@ def _VOP3AOp_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.f32; # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); @@ -13781,7 +15250,7 @@ def _VOP3AOp_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -13792,7 +15261,7 @@ def _VOP3AOp_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -13803,7 +15272,7 @@ def _VOP3AOp_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -13814,7 +15283,7 @@ def _VOP3AOp_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -13825,7 +15294,7 @@ def _VOP3AOp_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.f16 * S1.f16 + D0.f16; # if OPSEL.u4[3] then # D0 = { tmp.f16, D0[15 : 0] } @@ -13846,7 +15315,7 @@ def _VOP3AOp_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 + S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -13857,7 +15326,7 @@ def _VOP3AOp_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 - S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -13868,7 +15337,7 @@ def _VOP3AOp_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S1.u16 - S0.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -13879,7 +15348,7 @@ def _VOP3AOp_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -13890,7 +15359,7 @@ def _VOP3AOp_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13901,7 +15370,7 @@ def _VOP3AOp_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13912,7 +15381,7 @@ def _VOP3AOp_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = (S1.i16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13923,7 +15392,7 @@ def _VOP3AOp_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then @@ -13965,7 +15434,7 @@ def _VOP3AOp_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then @@ -14003,7 +15472,7 @@ def _VOP3AOp_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -14014,7 +15483,7 @@ def _VOP3AOp_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -14025,7 +15494,7 @@ def _VOP3AOp_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -14036,7 +15505,7 @@ def _VOP3AOp_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -14047,7 +15516,7 @@ def _VOP3AOp_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -14058,7 +15527,7 @@ def _VOP3AOp_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -14069,7 +15538,7 @@ def _VOP3AOp_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -14080,7 +15549,7 @@ def _VOP3AOp_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -14091,7 +15560,7 @@ def _VOP3AOp_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -14109,7 +15578,7 @@ def _VOP3AOp_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); @@ -14127,7 +15596,7 @@ def _VOP3AOp_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); @@ -14149,7 +15618,7 @@ def _VOP3AOp_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); @@ -14179,7 +15648,7 @@ def _VOP3AOp_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -14190,7 +15659,7 @@ def _VOP3AOp_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16); # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) S0 = Reg(s0) @@ -14203,7 +15672,7 @@ def _VOP3AOp_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -14214,7 +15683,7 @@ def _VOP3AOp_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -14226,7 +15695,7 @@ def _VOP3AOp_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -14238,7 +15707,7 @@ def _VOP3AOp_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -14287,7 +15756,7 @@ def _VOP3AOp_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap S coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -14329,7 +15798,7 @@ def _VOP3AOp_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap T coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -14364,7 +15833,7 @@ def _VOP3AOp_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = 2.0 * cubemap major axis. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -14392,7 +15861,7 @@ def _VOP3AOp_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) S0 = Reg(s0) S1 = Reg(s1) @@ -14404,7 +15873,7 @@ def _VOP3AOp_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) S0 = Reg(s0) @@ -14419,7 +15888,7 @@ def _VOP3AOp_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) S0 = Reg(s0) S1 = Reg(s1) @@ -14431,7 +15900,7 @@ def _VOP3AOp_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -14443,7 +15912,7 @@ def _VOP3AOp_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = fma(S0.f64, S1.f64, S2.f64) S0 = Reg(s0) S1 = Reg(s1) @@ -14456,7 +15925,7 @@ def _VOP3AOp_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); @@ -14477,7 +15946,7 @@ def _VOP3AOp_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0]) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -14489,7 +15958,7 @@ def _VOP3AOp_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0] * 8U)) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -14501,7 +15970,7 @@ def _VOP3AOp_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -14513,7 +15982,7 @@ def _VOP3AOp_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -14525,7 +15994,7 @@ def _VOP3AOp_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -14537,7 +16006,7 @@ def _VOP3AOp_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -14549,7 +16018,7 @@ def _VOP3AOp_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -14561,7 +16030,7 @@ def _VOP3AOp_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -14573,7 +16042,7 @@ def _VOP3AOp_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then # D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then @@ -14600,7 +16069,7 @@ def _VOP3AOp_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then # D0.i32 = v_max_i32(S1.i32, S2.i32) # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then @@ -14623,7 +16092,7 @@ def _VOP3AOp_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then # D0.u32 = v_max_u32(S1.u32, S2.u32) # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then @@ -14646,7 +16115,7 @@ def _VOP3AOp_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -14670,7 +16139,7 @@ def _VOP3AOp_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -14682,7 +16151,7 @@ def _VOP3AOp_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); @@ -14702,7 +16171,7 @@ def _VOP3AOp_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 S0 = Reg(s0) @@ -14715,7 +16184,7 @@ def _VOP3AOp_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); # D0.u32 = tmp @@ -14732,7 +16201,7 @@ def _VOP3AOp_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f32) ^ sign(S2.f32)); # if isNAN(64'F(S2.f32)) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) @@ -14785,7 +16254,7 @@ def _VOP3AOp_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f64) ^ sign(S2.f64)); # if isNAN(S2.f64) then # D0.f64 = cvtToQuietNAN(S2.f64) @@ -14839,7 +16308,7 @@ def _VOP3AOp_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3AOp_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) # else @@ -14861,7 +16330,7 @@ def _VOP3AOp_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3AOp_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) # else @@ -14884,7 +16353,7 @@ def _VOP3AOp_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3AOp_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -14908,7 +16377,7 @@ def _VOP3AOp_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -14930,7 +16399,7 @@ def _VOP3AOp_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3AOp_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -14952,7 +16421,7 @@ def _VOP3AOp_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _VOP3AOp_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); @@ -14973,7 +16442,7 @@ def _VOP3AOp_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.f16 * S1.f16 + S2.f16; # if OPSEL.u4[3] then # D0 = { tmp.f16, D0[15 : 0] } @@ -14995,7 +16464,7 @@ def _VOP3AOp_V_MAD_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_LEGACY_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_LEGACY_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u16 * S1.u16 + S2.u16; # if OPSEL.u4[3] then # D0 = { tmp.u16, D0[15 : 0] } @@ -15017,7 +16486,7 @@ def _VOP3AOp_V_MAD_LEGACY_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_LEGACY_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_LEGACY_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i16 * S1.i16 + S2.i16; # if OPSEL.u4[3] then # D0 = { tmp.i16, D0[15 : 0] } @@ -15039,7 +16508,25 @@ def _VOP3AOp_V_MAD_LEGACY_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMA_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); + # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); + # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); + # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) + D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) + D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) + D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FMA_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = fma(S0.f16, S1.f16, S2.f16); # if OPSEL.u4[3] then # D0 = { tmp.f16, D0[15 : 0] } @@ -15061,7 +16548,7 @@ def _VOP3AOp_V_FMA_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f16) ^ sign(S2.f16)); # if isNAN(64'F(S2.f16)) then # tmp = cvtToQuietNAN(64'F(S2.f16)) @@ -15116,7 +16603,7 @@ def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKACCUM_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKACCUM_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # byte = S1.u32[1 : 0]; # bit = byte.u32 * 8U; # D0.u32[bit + 7U : bit] = 32'U(f32_to_u8(S0.f32)) @@ -15131,7 +16618,7 @@ def _VOP3AOp_V_CVT_PKACCUM_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15143,7 +16630,7 @@ def _VOP3AOp_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15155,7 +16642,7 @@ def _VOP3AOp_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15167,7 +16654,7 @@ def _VOP3AOp_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -15179,7 +16666,7 @@ def _VOP3AOp_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -15191,7 +16678,7 @@ def _VOP3AOp_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -15203,7 +16690,7 @@ def _VOP3AOp_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -15215,7 +16702,7 @@ def _VOP3AOp_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -15227,7 +16714,7 @@ def _VOP3AOp_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -15239,7 +16726,7 @@ def _VOP3AOp_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then # D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then @@ -15266,7 +16753,7 @@ def _VOP3AOp_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then # D0.i16 = v_max_i16(S1.i16, S2.i16) # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then @@ -15289,7 +16776,7 @@ def _VOP3AOp_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then # D0.u16 = v_max_u16(S1.u16, S2.u16) # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then @@ -15312,7 +16799,7 @@ def _VOP3AOp_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15324,7 +16811,7 @@ def _VOP3AOp_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -15336,7 +16823,7 @@ def _VOP3AOp_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15348,7 +16835,7 @@ def _VOP3AOp_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -15360,7 +16847,7 @@ def _VOP3AOp_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -15372,7 +16859,7 @@ def _VOP3AOp_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32 | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -15384,7 +16871,7 @@ def _VOP3AOp_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 + S2.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -15396,7 +16883,7 @@ def _VOP3AOp_V_MAD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 + S2.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15408,7 +16895,7 @@ def _VOP3AOp_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 * S1.i16 + S2.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15420,7 +16907,7 @@ def _VOP3AOp_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -15432,7 +16919,7 @@ def _VOP3AOp_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f16) ^ sign(S2.f16)); # if isNAN(64'F(S2.f16)) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) @@ -15477,7 +16964,7 @@ def _VOP3AOp_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHL_ADD_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHL_ADD_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 << S1.u32[2 : 0].u32) + S2.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15490,7 +16977,7 @@ def _VOP3AOp_V_LSHL_ADD_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3AOp_V_BITOP3_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BITOP3_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 16'0U; # tmp = (tmp | (32'I(TTBL.b32 & 0x1) != 0 ? 16'U(~S0.b16 & ~S1.b16 & ~S2.b16) : 16'0U)); # tmp = (tmp | (32'I(TTBL.b32 & 0x2) != 0 ? 16'U(~S0.b16 & ~S1.b16 & S2.b16) : 16'0U)); @@ -15518,7 +17005,7 @@ def _VOP3AOp_V_BITOP3_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_BITOP3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BITOP3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # tmp = (tmp | (32'I(TTBL.b32 & 0x1) != 0 ? 32'U(~S0.b32 & ~S1.b32 & ~S2.b32) : 0U)); # tmp = (tmp | (32'I(TTBL.b32 & 0x2) != 0 ? 32'U(~S0.b32 & ~S1.b32 & S2.b32) : 0U)); @@ -15546,7 +17033,7 @@ def _VOP3AOp_V_BITOP3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp0 = f32_to_fp8_scale(S0.f32, scale.u8); # tmp1 = f32_to_fp8_scale(S1.f32, scale.u8); @@ -15565,7 +17052,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp0 = f32_to_bf8_scale(S0.f32, scale.u8); # tmp1 = f32_to_bf8_scale(S1.f32, scale.u8); @@ -15584,7 +17071,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = f32_to_fp8_sr_scale(S0.f32, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15601,7 +17088,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = f32_to_bf8_sr_scale(S0.f32, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15618,7 +17105,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -15639,7 +17126,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -15660,7 +17147,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8; @@ -15678,7 +17165,7 @@ def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8; @@ -15696,7 +17183,7 @@ def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp0 = f32_to_fp4_scale(S0.f32, scale.u8); # tmp1 = f32_to_fp4_scale(S1.f32, scale.u8); @@ -15715,7 +17202,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # tmp0 = f32_to_fp4_sr_scale(S0[31 : 0].f32, randomVal, scale.u8); @@ -15736,7 +17223,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; @@ -15757,7 +17244,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = f16_to_fp8_scale(S0[15 : 0].f16, scale.u8); # tmp1 = f16_to_fp8_scale(S0[31 : 16].f16, scale.u8); @@ -15775,7 +17262,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = f16_to_bf8_scale(S0[15 : 0].f16, scale.u8); # tmp1 = f16_to_bf8_scale(S0[31 : 16].f16, scale.u8); @@ -15793,7 +17280,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = f16_to_fp8_sr_scale(S0.f16, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15810,7 +17297,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = f16_to_bf8_sr_scale(S0.f16, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15827,7 +17314,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = bf16_to_fp8_scale(S0[15 : 0].bf16, scale.u8); # tmp1 = bf16_to_fp8_scale(S0[31 : 16].bf16, scale.u8); @@ -15845,7 +17332,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = bf16_to_bf8_scale(S0[15 : 0].bf16, scale.u8); # tmp1 = bf16_to_bf8_scale(S0[31 : 16].bf16, scale.u8); @@ -15863,7 +17350,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = bf16_to_fp8_sr_scale(S0.bf16, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15880,7 +17367,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = bf16_to_bf8_sr_scale(S0.bf16, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15897,7 +17384,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -15918,7 +17405,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -15939,7 +17426,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8; @@ -15958,7 +17445,7 @@ def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8; @@ -15977,7 +17464,7 @@ def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = f16_to_fp4_scale(S0[15 : 0].f16, scale.u8); # tmp1 = f16_to_fp4_scale(S0[31 : 16].f16, scale.u8); @@ -15995,7 +17482,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = bf16_to_fp4_scale(S0[15 : 0].bf16, scale.u8); # tmp1 = bf16_to_fp4_scale(S0[31 : 16].bf16, scale.u8); @@ -16013,7 +17500,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # tmp0 = f16_to_fp4_sr_scale(S0[15 : 0].f16, randomVal, scale.u8); @@ -16034,7 +17521,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # tmp0 = bf16_to_fp4_sr_scale(S0[15 : 0].bf16, randomVal, scale.u8); @@ -16055,7 +17542,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_ result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; @@ -16076,7 +17563,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; @@ -16097,7 +17584,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # declare tmp : 192'B; # for pass in 0 : 15 do @@ -16121,7 +17608,7 @@ def _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # declare tmp : 192'B; # for pass in 0 : 15 do @@ -16145,7 +17632,7 @@ def _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16167,7 +17654,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16189,7 +17676,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 1024'B; # for pass in 0 : 31 do @@ -16209,7 +17696,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_ma result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 1024'B; # for pass in 0 : 31 do @@ -16229,7 +17716,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_ma result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 192'B; # for pass in 0 : 31 do @@ -16249,7 +17736,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 192'B; # for pass in 0 : 31 do @@ -16269,7 +17756,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_ma result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 192'B; # for pass in 0 : 31 do @@ -16289,7 +17776,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16311,7 +17798,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16(s0, s1, s2, d0, scc, vcc, lane, exec result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16333,7 +17820,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exe result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16355,7 +17842,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16377,7 +17864,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exe result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 512'B; # for pass in 0 : 31 do @@ -16397,7 +17884,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_ma result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 512'B; # for pass in 0 : 31 do @@ -16417,7 +17904,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 512'B; # for pass in 0 : 31 do @@ -16437,7 +17924,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_ma result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 512'B; # for pass in 0 : 31 do @@ -16457,11 +17944,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ASHR_PK_I8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if n <= -128 then - # elsif n >= 127 then - # else - # endif); +def _VOP3AOp_V_ASHR_PK_I8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 16'B; # tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32); # tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32); @@ -16472,12 +17955,6 @@ def _VOP3AOp_V_ASHR_PK_I8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal D0 = Reg(d0) tmp = Reg(0) # --- compiled pseudocode --- - if n <= -128: - pass - elif n >= 127: - pass - else: - pass tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32) tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32) D0[15 : 0] = tmp @@ -16485,11 +17962,7 @@ def _VOP3AOp_V_ASHR_PK_I8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ASHR_PK_U8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if n <= 0 then - # elsif n >= 255 then - # else - # endif); +def _VOP3AOp_V_ASHR_PK_U8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 16'B; # tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32); # tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32); @@ -16500,12 +17973,6 @@ def _VOP3AOp_V_ASHR_PK_U8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal D0 = Reg(d0) tmp = Reg(0) # --- compiled pseudocode --- - if n <= 0: - pass - elif n >= 255: - pass - else: - pass tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32) tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32) D0[15 : 0] = tmp @@ -16513,7 +17980,7 @@ def _VOP3AOp_V_ASHR_PK_U8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PK_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PK_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -16528,7 +17995,7 @@ def _VOP3AOp_V_CVT_PK_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PK_BF16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PK_BF16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].bf16 = f32_to_bf16(S0.f32); # tmp[31 : 16].bf16 = f32_to_bf16(S1.f32); @@ -16543,7 +18010,7 @@ def _VOP3AOp_V_CVT_PK_BF16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -16564,7 +18031,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -16585,7 +18052,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -16597,7 +18064,7 @@ def _VOP3AOp_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -16609,7 +18076,7 @@ def _VOP3AOp_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(S0.f64)) then # D0.f64 = cvtToQuietNAN(S0.f64) # elsif (WAVE_MODE.IEEE && isSignalNAN(S1.f64)) then @@ -16648,7 +18115,7 @@ def _VOP3AOp_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(S0.f64)) then # D0.f64 = cvtToQuietNAN(S0.f64) # elsif (WAVE_MODE.IEEE && isSignalNAN(S1.f64)) then @@ -16691,7 +18158,7 @@ def _VOP3AOp_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * 2.0 ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -16703,7 +18170,7 @@ def _VOP3AOp_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _VOP3AOp_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 * S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -16714,7 +18181,7 @@ def _VOP3AOp_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -16725,7 +18192,7 @@ def _VOP3AOp_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -16736,7 +18203,7 @@ def _VOP3AOp_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * 2.0F ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -16747,7 +18214,7 @@ def _VOP3AOp_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # lane = S1.u32[5 : 0]; # // Lane select # D0.b32 = VGPR[lane][SRC0.u32] @@ -16761,7 +18228,7 @@ def _VOP3AOp_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32; # for i in 0 : 31 do # tmp += S0[i].u32; @@ -16781,7 +18248,7 @@ def _VOP3AOp_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 << S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -16793,7 +18260,7 @@ def _VOP3AOp_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -16805,7 +18272,7 @@ def _VOP3AOp_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (S1.i64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -16817,7 +18284,7 @@ def _VOP3AOp_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -16828,7 +18295,7 @@ def _VOP3AOp_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKNORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKNORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); @@ -16842,7 +18309,7 @@ def _VOP3AOp_V_CVT_PKNORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKNORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKNORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); @@ -16856,7 +18323,7 @@ def _VOP3AOp_V_CVT_PKNORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKRTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKRTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -16871,7 +18338,7 @@ def _VOP3AOp_V_CVT_PKRTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = u32_to_u16(S0.u32); # tmp[31 : 16].u16 = u32_to_u16(S1.u32); @@ -16885,7 +18352,7 @@ def _VOP3AOp_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = i32_to_i16(S0.i32); # tmp[31 : 16].i16 = i32_to_i16(S1.i32); @@ -16899,7 +18366,7 @@ def _VOP3AOp_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKNORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKNORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); @@ -16913,7 +18380,7 @@ def _VOP3AOp_V_CVT_PKNORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKNORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKNORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); @@ -16927,7 +18394,7 @@ def _VOP3AOp_V_CVT_PKNORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 + S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -16938,7 +18405,7 @@ def _VOP3AOp_V_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -16949,7 +18416,7 @@ def _VOP3AOp_V_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 + S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -16960,7 +18427,7 @@ def _VOP3AOp_V_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 - S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -16971,7 +18438,7 @@ def _VOP3AOp_V_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[31 : 16].f16 = S1.f16; # D0[15 : 0].f16 = S0.f16 S0 = Reg(s0) @@ -16984,7 +18451,7 @@ def _VOP3AOp_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_LEGACY_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_LEGACY_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -17003,7 +18470,7 @@ def _VOP3AOp_V_MUL_LEGACY_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 32'F(v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32)) S0 = Reg(s0) S1 = Reg(s1) @@ -17015,7 +18482,7 @@ def _VOP3AOp_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 32'F(v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32)) S0 = Reg(s0) S1 = Reg(s1) @@ -17377,6 +18844,7 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_MAD_LEGACY_F16: _VOP3AOp_V_MAD_LEGACY_F16, VOP3AOp.V_MAD_LEGACY_U16: _VOP3AOp_V_MAD_LEGACY_U16, VOP3AOp.V_MAD_LEGACY_I16: _VOP3AOp_V_MAD_LEGACY_I16, + VOP3AOp.V_PERM_B32: _VOP3AOp_V_PERM_B32, VOP3AOp.V_FMA_LEGACY_F16: _VOP3AOp_V_FMA_LEGACY_F16, VOP3AOp.V_DIV_FIXUP_LEGACY_F16: _VOP3AOp_V_DIV_FIXUP_LEGACY_F16, VOP3AOp.V_CVT_PKACCUM_U8_F32: _VOP3AOp_V_CVT_PKACCUM_U8_F32, @@ -17490,7 +18958,7 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_MAXIMUM3_F32: _VOP3AOp_V_MAXIMUM3_F32, } -def _VOP3BOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. @@ -17510,7 +18978,7 @@ def _VOP3BOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -17530,7 +18998,7 @@ def _VOP3BOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32; # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -17550,7 +19018,7 @@ def _VOP3BOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. @@ -17570,7 +19038,7 @@ def _VOP3BOp_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -17590,7 +19058,7 @@ def _VOP3BOp_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -17610,7 +19078,7 @@ def _VOP3BOp_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # D0.f32 = NAN.f32 @@ -17673,7 +19141,7 @@ def _VOP3BOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then # D0.f64 = NAN.f64 @@ -17737,7 +19205,7 @@ def _VOP3BOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3BOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) S0 = Reg(s0) S1 = Reg(s1) @@ -17754,7 +19222,7 @@ def _VOP3BOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d1'] = D1._val & 1 return result -def _VOP3BOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) S0 = Reg(s0) S1 = Reg(s1) diff --git a/extra/assembly/amd/autogen/rdna3/gen_pcode.py b/extra/assembly/amd/autogen/rdna3/gen_pcode.py index 8ce42c1cc9..df32416e22 100644 --- a/extra/assembly/amd/autogen/rdna3/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna3/gen_pcode.py @@ -5,7 +5,7 @@ from extra.assembly.amd.autogen.rdna3 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -15,7 +15,7 @@ def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b64 = S0.b64 S0 = Reg(s0) D0 = Reg(d0) @@ -26,7 +26,7 @@ def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b32 = S0.b32 # endif @@ -40,7 +40,7 @@ def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b64 = S0.b64 # endif @@ -55,7 +55,7 @@ def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -65,7 +65,7 @@ def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[63 : 0] = S0.u64[0 : 63] S0 = Reg(s0) D0 = Reg(d0) @@ -76,7 +76,7 @@ def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -99,7 +99,7 @@ def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -122,7 +122,7 @@ def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -145,7 +145,7 @@ def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -168,7 +168,7 @@ def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -191,7 +191,7 @@ def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 63 do @@ -214,7 +214,7 @@ def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i8)) S0 = Reg(s0) D0 = Reg(d0) @@ -224,7 +224,7 @@ def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -234,7 +234,7 @@ def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -244,7 +244,7 @@ def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -255,7 +255,7 @@ def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -265,7 +265,7 @@ def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -276,7 +276,7 @@ def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32; # for i in 0 : 31 do # D0.u64[i * 2] = tmp[i]; @@ -295,7 +295,7 @@ def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -308,7 +308,7 @@ def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'0U ? 1 : 0 @@ -329,7 +329,7 @@ def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'0U ? 1 : 0 @@ -351,7 +351,7 @@ def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'1U ? 1 : 0 @@ -372,7 +372,7 @@ def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'1U ? 1 : 0 @@ -394,7 +394,7 @@ def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # for i in 0 : 7 do # tmp[i] = S0.u32[i * 4 +: 4] != 0U @@ -415,7 +415,7 @@ def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # for i in 0 : 15 do # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL @@ -437,7 +437,7 @@ def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # declare i : 6'U; # for i in 6'0U : 6'31U do @@ -459,7 +459,7 @@ def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # declare i : 6'U; # for i in 6'0U : 6'63U do @@ -482,7 +482,7 @@ def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32; # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -495,7 +495,7 @@ def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~S0.u64; # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -509,7 +509,7 @@ def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -531,7 +531,7 @@ def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -554,7 +554,7 @@ def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u32; @@ -576,7 +576,7 @@ def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u64; @@ -599,7 +599,7 @@ def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -621,7 +621,7 @@ def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -644,7 +644,7 @@ def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -666,7 +666,7 @@ def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -689,7 +689,7 @@ def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -711,7 +711,7 @@ def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -734,7 +734,7 @@ def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -756,7 +756,7 @@ def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -779,7 +779,7 @@ def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u32; @@ -801,7 +801,7 @@ def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -824,7 +824,7 @@ def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result['d0_64'] = True return result -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u32; @@ -846,7 +846,7 @@ def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -869,7 +869,7 @@ def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u32; @@ -891,7 +891,7 @@ def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -914,7 +914,7 @@ def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result['d0_64'] = True return result -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u32; @@ -936,7 +936,7 @@ def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -959,7 +959,7 @@ def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -979,7 +979,7 @@ def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1000,7 +1000,7 @@ def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1020,7 +1020,7 @@ def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1041,9 +1041,65 @@ def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result['d0_64'] = True return result -def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # jump_addr = S0.i64; + # D0.i64 = PC + 4LL; + # PC = jump_addr.i64 + S0 = Reg(s0) + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + jump_addr = S0.i64 + D0.i64 = PC + 4 + PC = Reg(jump_addr.i64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # If SDST is VCC then VCCZ is undefined. VCC = Reg(vcc) + VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- # --- end pseudocode --- @@ -1051,9 +1107,10 @@ def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # If SDST is VCC then VCCZ is undefined. VCC = Reg(vcc) + VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- # --- end pseudocode --- @@ -1061,7 +1118,7 @@ def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -1076,7 +1133,7 @@ def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -1091,7 +1148,7 @@ def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1101,7 +1158,7 @@ def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -1116,7 +1173,7 @@ def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -1126,7 +1183,7 @@ def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -1136,7 +1193,7 @@ def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1146,7 +1203,7 @@ def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1156,7 +1213,7 @@ def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1166,7 +1223,7 @@ def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1176,7 +1233,7 @@ def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0[31 : 16].f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1186,7 +1243,7 @@ def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -1201,7 +1258,7 @@ def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -1216,7 +1273,7 @@ def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1226,7 +1283,7 @@ def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -1296,6 +1353,10 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_AND_NOT0_WREXEC_B64: _SOP1Op_S_AND_NOT0_WREXEC_B64, SOP1Op.S_AND_NOT1_WREXEC_B32: _SOP1Op_S_AND_NOT1_WREXEC_B32, SOP1Op.S_AND_NOT1_WREXEC_B64: _SOP1Op_S_AND_NOT1_WREXEC_B64, + SOP1Op.S_GETPC_B64: _SOP1Op_S_GETPC_B64, + SOP1Op.S_SETPC_B64: _SOP1Op_S_SETPC_B64, + SOP1Op.S_SWAPPC_B64: _SOP1Op_S_SWAPPC_B64, + SOP1Op.S_RFE_B64: _SOP1Op_S_RFE_B64, SOP1Op.S_SENDMSG_RTN_B32: _SOP1Op_S_SENDMSG_RTN_B32, SOP1Op.S_SENDMSG_RTN_B64: _SOP1Op_S_SENDMSG_RTN_B64, SOP1Op.S_CEIL_F32: _SOP1Op_S_CEIL_F32, @@ -1315,7 +1376,7 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_RNDNE_F16: _SOP1Op_S_RNDNE_F16, } -def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1332,7 +1393,7 @@ def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1349,7 +1410,7 @@ def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 + S1.i32; # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -1366,7 +1427,7 @@ def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 - S1.i32; # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -1383,7 +1444,7 @@ def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1400,7 +1461,7 @@ def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - SCC.u32; # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1417,7 +1478,7 @@ def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32; # if D0.i32 < 0 then # D0.i32 = -D0.i32 @@ -1436,7 +1497,7 @@ def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1450,7 +1511,7 @@ def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 << S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1465,7 +1526,7 @@ def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 >> S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1479,7 +1540,7 @@ def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 >> S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1494,7 +1555,7 @@ def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -1508,7 +1569,7 @@ def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); # SCC = D0.i64 != 0LL S0 = Reg(s0) @@ -1523,7 +1584,7 @@ def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1540,7 +1601,7 @@ def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1557,7 +1618,7 @@ def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1574,7 +1635,7 @@ def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1591,7 +1652,7 @@ def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1605,7 +1666,7 @@ def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1619,7 +1680,7 @@ def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1633,7 +1694,7 @@ def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1647,7 +1708,7 @@ def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1661,7 +1722,7 @@ def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1676,7 +1737,7 @@ def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1690,7 +1751,7 @@ def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1705,7 +1766,7 @@ def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result['d0_64'] = True return result -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1719,7 +1780,7 @@ def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1734,7 +1795,7 @@ def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1748,7 +1809,7 @@ def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1763,7 +1824,7 @@ def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1777,7 +1838,7 @@ def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1792,7 +1853,7 @@ def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1806,7 +1867,7 @@ def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1821,7 +1882,7 @@ def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1835,7 +1896,7 @@ def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1850,7 +1911,7 @@ def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1864,7 +1925,7 @@ def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1879,7 +1940,7 @@ def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1893,7 +1954,7 @@ def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); # SCC = D0.i32 != 0 @@ -1910,7 +1971,7 @@ def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1925,7 +1986,7 @@ def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); # SCC = D0.i64 != 0LL @@ -1943,7 +2004,7 @@ def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -1954,7 +2015,7 @@ def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -1966,7 +2027,7 @@ def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 * S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1977,7 +2038,7 @@ def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -1988,7 +2049,7 @@ def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -1999,7 +2060,7 @@ def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2011,7 +2072,7 @@ def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = SCC ? S0.u64 : S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2024,7 +2085,7 @@ def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2035,7 +2096,7 @@ def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2046,7 +2107,7 @@ def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2057,7 +2118,7 @@ def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[15 : 0].u16, S0[31 : 16].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2068,7 +2129,7 @@ def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2079,7 +2140,7 @@ def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2090,7 +2151,7 @@ def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -2150,7 +2211,7 @@ def _SOP2Op_S_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -2210,7 +2271,7 @@ def _SOP2Op_S_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2221,7 +2282,7 @@ def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2233,7 +2294,7 @@ def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2245,7 +2306,7 @@ def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2256,7 +2317,7 @@ def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -2271,7 +2332,7 @@ def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2282,7 +2343,7 @@ def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2293,7 +2354,7 @@ def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -2353,7 +2414,7 @@ def _SOP2Op_S_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -2413,7 +2474,7 @@ def _SOP2Op_S_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2424,7 +2485,7 @@ def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -2505,7 +2566,7 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_FMAC_F16: _SOP2Op_S_FMAC_F16, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 == S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2516,7 +2577,7 @@ def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2527,7 +2588,7 @@ def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2538,7 +2599,7 @@ def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2549,7 +2610,7 @@ def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2560,7 +2621,7 @@ def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2571,7 +2632,7 @@ def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 == S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2582,7 +2643,7 @@ def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2593,7 +2654,7 @@ def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2604,7 +2665,7 @@ def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2615,7 +2676,7 @@ def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2626,7 +2687,7 @@ def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2637,7 +2698,7 @@ def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -2648,7 +2709,7 @@ def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -2659,7 +2720,7 @@ def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -2670,7 +2731,7 @@ def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -2681,7 +2742,7 @@ def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 == S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2692,7 +2753,7 @@ def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2703,7 +2764,7 @@ def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2714,7 +2775,7 @@ def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2725,7 +2786,7 @@ def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 == S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2736,7 +2797,7 @@ def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 == S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2747,7 +2808,7 @@ def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2758,7 +2819,7 @@ def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2769,7 +2830,7 @@ def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2780,7 +2841,7 @@ def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2791,7 +2852,7 @@ def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2802,7 +2863,7 @@ def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2813,7 +2874,7 @@ def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2824,7 +2885,7 @@ def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2835,7 +2896,7 @@ def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -2846,7 +2907,7 @@ def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -2857,7 +2918,7 @@ def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -2868,7 +2929,7 @@ def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -2879,7 +2940,7 @@ def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -2891,7 +2952,7 @@ def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -2903,7 +2964,7 @@ def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -2915,7 +2976,7 @@ def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -2927,7 +2988,7 @@ def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -2939,7 +3000,7 @@ def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -2951,7 +3012,7 @@ def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -2963,7 +3024,7 @@ def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -2975,7 +3036,7 @@ def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -2987,7 +3048,7 @@ def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -2999,7 +3060,7 @@ def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -3011,7 +3072,7 @@ def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -3072,7 +3133,7 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_NLT_F16: _SOPCOp_S_CMP_NLT_F16, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(SIMM16.i16)) D0 = Reg(d0) SIMM16 = Reg(literal) @@ -3082,7 +3143,7 @@ def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Do nothing - for use by tools only # --- compiled pseudocode --- @@ -3090,7 +3151,7 @@ def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': d0, 'scc': scc & 1} return result -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.i32 = 32'I(signext(SIMM16.i16)) # endif @@ -3104,7 +3165,7 @@ def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) == signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3115,7 +3176,7 @@ def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) != signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3126,7 +3187,7 @@ def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) > signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3137,7 +3198,7 @@ def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) >= signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3148,7 +3209,7 @@ def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) < signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3159,7 +3220,7 @@ def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) <= signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3170,7 +3231,7 @@ def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 == 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3181,7 +3242,7 @@ def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 != 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3192,7 +3253,7 @@ def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 > 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3203,7 +3264,7 @@ def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3214,7 +3275,7 @@ def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3225,7 +3286,7 @@ def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <= 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3236,7 +3297,7 @@ def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # D0.i32 = 32'I(64'I(D0.i32) + signext(SIMM16.i16)); # SCC = ((tmp[31] == SIMM16.i16[15]) && (tmp[31] != D0.i32[31])); @@ -3252,7 +3313,7 @@ def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(64'I(D0.i32) * signext(SIMM16.i16)) D0 = Reg(d0) SIMM16 = Reg(literal) @@ -3262,6 +3323,22 @@ def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result +def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL; + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + D0 = Reg(d0) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, SOPKOp.S_VERSION: _SOPKOp_S_VERSION, @@ -3280,9 +3357,10 @@ SOPKOp_FUNCTIONS = { SOPKOp.S_CMPK_LE_U32: _SOPKOp_S_CMPK_LE_U32, SOPKOp.S_ADDK_I32: _SOPKOp_S_ADDK_I32, SOPKOp.S_MULK_I32: _SOPKOp_S_MULK_I32, + SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # for i in 0U : SIMM16.u16[3 : 0].u32 do # endfor SIMM16 = Reg(literal) @@ -3293,7 +3371,7 @@ def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _var result = {'d0': d0, 'scc': scc & 1} return result -def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # instruction may be omitted. For wave64 the compiler may not know the status of the EXEC mask and hence # // 1 cycle delay here # // 2 cycles delay here @@ -3305,22 +3383,255 @@ def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // PC passed into trap handler points to S_TRAP itself, + # PC = TBA.i64; # // trap base address + PC = Reg(pc) # --- compiled pseudocode --- # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 1 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 0 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGSYS(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if WAVE_STATUS.COND_DBG_SYS.u32 != 0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if WAVE_STATUS.COND_DBG_SYS.u32 != 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGUSER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if WAVE_STATUS.COND_DBG_USER.u32 != 0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if WAVE_STATUS.COND_DBG_USER.u32 != 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if (WAVE_STATUS.COND_DBG_SYS || WAVE_STATUS.COND_DBG_USER) then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if (WAVE_STATUS.COND_DBG_SYS or WAVE_STATUS.COND_DBG_USER): + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if (WAVE_STATUS.COND_DBG_SYS && WAVE_STATUS.COND_DBG_USER) then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if (WAVE_STATUS.COND_DBG_SYS and WAVE_STATUS.COND_DBG_USER): + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, SOPPOp.S_DELAY_ALU: _SOPPOp_S_DELAY_ALU, SOPPOp.S_TRAP: _SOPPOp_S_TRAP, + SOPPOp.S_BRANCH: _SOPPOp_S_BRANCH, + SOPPOp.S_CBRANCH_SCC0: _SOPPOp_S_CBRANCH_SCC0, + SOPPOp.S_CBRANCH_SCC1: _SOPPOp_S_CBRANCH_SCC1, + SOPPOp.S_CBRANCH_VCCZ: _SOPPOp_S_CBRANCH_VCCZ, + SOPPOp.S_CBRANCH_VCCNZ: _SOPPOp_S_CBRANCH_VCCNZ, + SOPPOp.S_CBRANCH_EXECZ: _SOPPOp_S_CBRANCH_EXECZ, + SOPPOp.S_CBRANCH_EXECNZ: _SOPPOp_S_CBRANCH_EXECNZ, + SOPPOp.S_CBRANCH_CDBGSYS: _SOPPOp_S_CBRANCH_CDBGSYS, + SOPPOp.S_CBRANCH_CDBGUSER: _SOPPOp_S_CBRANCH_CDBGUSER, + SOPPOp.S_CBRANCH_CDBGSYS_OR_USER: _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER, + SOPPOp.S_CBRANCH_CDBGSYS_AND_USER: _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -3330,7 +3641,7 @@ def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE64 then # // 64 lanes @@ -3373,7 +3684,7 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3383,7 +3694,7 @@ def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -3394,7 +3705,7 @@ def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -3404,7 +3715,7 @@ def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3414,7 +3725,7 @@ def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3424,7 +3735,7 @@ def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3434,7 +3745,7 @@ def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3444,7 +3755,7 @@ def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -3454,7 +3765,7 @@ def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -3464,7 +3775,7 @@ def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -3474,7 +3785,7 @@ def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3484,7 +3795,7 @@ def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3495,7 +3806,7 @@ def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3505,7 +3816,7 @@ def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3515,7 +3826,7 @@ def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3525,7 +3836,7 @@ def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3535,7 +3846,7 @@ def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3545,7 +3856,7 @@ def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3556,7 +3867,7 @@ def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3567,7 +3878,7 @@ def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -3583,7 +3894,7 @@ def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -3599,7 +3910,7 @@ def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -3615,7 +3926,7 @@ def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b16 = S0.b16 S0 = Reg(s0) D0 = Reg(d0) @@ -3625,7 +3936,7 @@ def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3635,7 +3946,7 @@ def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3645,7 +3956,7 @@ def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -3660,7 +3971,7 @@ def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -3675,7 +3986,7 @@ def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -3690,7 +4001,7 @@ def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3700,7 +4011,7 @@ def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3710,7 +4021,7 @@ def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -3720,7 +4031,7 @@ def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -3731,7 +4042,7 @@ def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3741,7 +4052,7 @@ def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -3752,7 +4063,7 @@ def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3763,7 +4074,7 @@ def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3773,7 +4084,7 @@ def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3784,7 +4095,7 @@ def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -3794,7 +4105,7 @@ def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -3804,7 +4115,7 @@ def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -3814,7 +4125,7 @@ def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -3824,7 +4135,7 @@ def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -3844,7 +4155,7 @@ def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -3864,7 +4175,7 @@ def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -3884,7 +4195,7 @@ def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -3901,7 +4212,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -3919,7 +4230,7 @@ def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3930,7 +4241,7 @@ def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -3947,7 +4258,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -3964,7 +4275,7 @@ def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # addr = SRC0.u32; # // Raw value from instruction # D0.b32 = VGPR[laneId][addr].b32 @@ -3978,7 +4289,7 @@ def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -3988,7 +4299,7 @@ def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -3998,7 +4309,7 @@ def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4008,7 +4319,7 @@ def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4018,7 +4329,7 @@ def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -4028,7 +4339,7 @@ def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4038,7 +4349,7 @@ def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4048,7 +4359,7 @@ def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4058,7 +4369,7 @@ def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4068,7 +4379,7 @@ def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -4085,7 +4396,7 @@ def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -4102,7 +4413,7 @@ def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -4117,7 +4428,7 @@ def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -4132,7 +4443,7 @@ def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4142,7 +4453,7 @@ def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -4157,7 +4468,7 @@ def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + -floor(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4167,7 +4478,7 @@ def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -4177,7 +4488,7 @@ def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -4187,7 +4498,17 @@ def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.b16 = { SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16) } + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b16 = _pack(SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_snorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4197,7 +4518,7 @@ def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_unorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4207,7 +4528,7 @@ def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.b32; # D0.b32 = S0.b32; # S0.b32 = tmp @@ -4222,7 +4543,7 @@ def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.b16; # D0.b16 = S0.b16; # S0.b16 = tmp @@ -4237,7 +4558,7 @@ def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = ~S0.u16 S0 = Reg(s0) D0 = Reg(d0) @@ -4247,7 +4568,7 @@ def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -4257,7 +4578,7 @@ def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { 16'0, S0.u16 } S0 = Reg(s0) D0 = Reg(d0) @@ -4338,6 +4659,7 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_FRACT_F16: _VOP1Op_V_FRACT_F16, VOP1Op.V_SIN_F16: _VOP1Op_V_SIN_F16, VOP1Op.V_COS_F16: _VOP1Op_V_COS_F16, + VOP1Op.V_SAT_PK_U8_I16: _VOP1Op_V_SAT_PK_U8_I16, VOP1Op.V_CVT_NORM_I16_F16: _VOP1Op_V_CVT_NORM_I16_F16, VOP1Op.V_CVT_NORM_U16_F16: _VOP1Op_V_CVT_NORM_U16_F16, VOP1Op.V_SWAP_B32: _VOP1Op_V_SWAP_B32, @@ -4347,7 +4669,7 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_U32_U16: _VOP1Op_V_CVT_U32_U16, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4361,7 +4683,7 @@ def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_DOT2ACC_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT2ACC_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -4379,7 +4701,7 @@ def _VOP2Op_V_DOT2ACC_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4390,7 +4712,7 @@ def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4401,7 +4723,7 @@ def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4412,7 +4734,7 @@ def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = S2.f32 @@ -4432,7 +4754,7 @@ def _VOP2Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -4451,7 +4773,7 @@ def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4462,7 +4784,7 @@ def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -4473,7 +4795,7 @@ def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -4484,7 +4806,7 @@ def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -4495,7 +4817,7 @@ def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -4506,7 +4828,7 @@ def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -4566,7 +4888,7 @@ def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -4626,7 +4948,7 @@ def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -4637,7 +4959,7 @@ def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -4648,7 +4970,7 @@ def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4659,7 +4981,7 @@ def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4670,7 +4992,7 @@ def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4681,7 +5003,7 @@ def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4692,7 +5014,7 @@ def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4703,7 +5025,7 @@ def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4714,7 +5036,7 @@ def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4725,7 +5047,7 @@ def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4736,7 +5058,7 @@ def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4747,7 +5069,7 @@ def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -4767,7 +5089,7 @@ def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -4787,7 +5109,7 @@ def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -4807,7 +5129,7 @@ def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4818,7 +5140,7 @@ def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4829,7 +5151,7 @@ def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4840,7 +5162,7 @@ def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -4851,7 +5173,7 @@ def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -4863,7 +5185,7 @@ def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -4875,7 +5197,7 @@ def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -4890,7 +5212,7 @@ def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -4901,7 +5223,7 @@ def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -4912,7 +5234,7 @@ def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -4923,7 +5245,7 @@ def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -4934,7 +5256,7 @@ def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -4945,7 +5267,7 @@ def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -4957,7 +5279,7 @@ def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -4969,7 +5291,7 @@ def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -5029,7 +5351,7 @@ def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -5089,7 +5411,7 @@ def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -5100,7 +5422,7 @@ def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16); # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) S0 = Reg(s0) @@ -5162,22 +5484,25 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, } -def _VOP3Op_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -5186,15 +5511,18 @@ def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -5203,15 +5531,18 @@ def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5219,15 +5550,18 @@ def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -5236,15 +5570,18 @@ def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5252,15 +5589,18 @@ def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5268,15 +5608,18 @@ def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5285,15 +5628,18 @@ def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5302,15 +5648,18 @@ def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5319,15 +5668,18 @@ def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5336,15 +5688,18 @@ def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -5354,15 +5709,18 @@ def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5371,15 +5729,18 @@ def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -5389,15 +5750,18 @@ def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -5407,45 +5771,54 @@ def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -5454,15 +5827,18 @@ def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -5471,15 +5847,18 @@ def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5487,15 +5866,18 @@ def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -5504,15 +5886,18 @@ def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5520,15 +5905,18 @@ def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5536,15 +5924,18 @@ def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -5553,15 +5944,18 @@ def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -5570,15 +5964,18 @@ def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5587,15 +5984,18 @@ def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5604,15 +6004,18 @@ def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -5622,15 +6025,18 @@ def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5639,15 +6045,18 @@ def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -5657,15 +6066,18 @@ def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -5675,45 +6087,54 @@ def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -5722,15 +6143,18 @@ def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -5739,15 +6163,18 @@ def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5755,15 +6182,18 @@ def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -5772,15 +6202,18 @@ def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5788,15 +6221,18 @@ def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5804,15 +6240,18 @@ def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -5821,15 +6260,18 @@ def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -5838,15 +6280,18 @@ def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5855,15 +6300,18 @@ def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5872,15 +6320,18 @@ def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -5890,15 +6341,18 @@ def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5907,15 +6361,18 @@ def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -5925,15 +6382,18 @@ def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -5943,30 +6403,36 @@ def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -5975,15 +6441,18 @@ def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -5992,15 +6461,18 @@ def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6008,15 +6480,18 @@ def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -6025,15 +6500,18 @@ def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -6042,15 +6520,18 @@ def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6058,15 +6539,18 @@ def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -6075,15 +6559,18 @@ def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -6092,15 +6579,18 @@ def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6108,15 +6598,18 @@ def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -6125,15 +6618,18 @@ def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -6142,15 +6638,18 @@ def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6158,30 +6657,36 @@ def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -6190,15 +6695,18 @@ def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -6207,15 +6715,18 @@ def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6223,15 +6734,18 @@ def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -6240,15 +6754,18 @@ def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -6257,15 +6774,18 @@ def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6273,45 +6793,54 @@ def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -6320,15 +6849,18 @@ def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -6337,15 +6869,18 @@ def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6353,15 +6888,18 @@ def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -6370,15 +6908,18 @@ def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -6387,15 +6928,18 @@ def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6403,45 +6947,54 @@ def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -6450,15 +7003,18 @@ def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -6467,15 +7023,18 @@ def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6483,15 +7042,18 @@ def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -6500,15 +7062,18 @@ def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -6517,15 +7082,18 @@ def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6533,45 +7101,54 @@ def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -6580,15 +7157,18 @@ def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -6597,15 +7177,18 @@ def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6613,15 +7196,18 @@ def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -6630,15 +7216,18 @@ def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -6647,15 +7236,18 @@ def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6663,30 +7255,36 @@ def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6723,6 +7321,7 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -6741,9 +7340,11 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6780,6 +7381,7 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -6798,9 +7400,11 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6837,6 +7441,7 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -6855,9 +7460,11 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -6868,7 +7475,7 @@ def _VOP3Op_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6881,7 +7488,7 @@ def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f16 == S1.f16 S0 = Reg(s0) @@ -6895,7 +7502,7 @@ def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6908,7 +7515,7 @@ def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6921,7 +7528,7 @@ def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6934,7 +7541,7 @@ def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6947,7 +7554,7 @@ def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -6960,7 +7567,7 @@ def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -6973,7 +7580,7 @@ def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -6987,7 +7594,7 @@ def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -7001,7 +7608,7 @@ def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -7015,7 +7622,7 @@ def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -7029,7 +7636,7 @@ def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -7043,7 +7650,7 @@ def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7057,7 +7664,7 @@ def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7068,7 +7675,7 @@ def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7079,7 +7686,7 @@ def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7092,7 +7699,7 @@ def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f32 == S1.f32 S0 = Reg(s0) @@ -7106,7 +7713,7 @@ def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7119,7 +7726,7 @@ def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7132,7 +7739,7 @@ def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7145,7 +7752,7 @@ def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7158,7 +7765,7 @@ def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -7171,7 +7778,7 @@ def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -7184,7 +7791,7 @@ def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -7198,7 +7805,7 @@ def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -7212,7 +7819,7 @@ def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -7226,7 +7833,7 @@ def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -7240,7 +7847,7 @@ def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -7254,7 +7861,7 @@ def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7268,7 +7875,7 @@ def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7279,7 +7886,7 @@ def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7290,7 +7897,7 @@ def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 < S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7303,7 +7910,7 @@ def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f64 == S1.f64 S0 = Reg(s0) @@ -7317,7 +7924,7 @@ def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7330,7 +7937,7 @@ def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 > S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7343,7 +7950,7 @@ def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <> S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7356,7 +7963,7 @@ def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 >= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7369,7 +7976,7 @@ def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -7382,7 +7989,7 @@ def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -7395,7 +8002,7 @@ def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -7409,7 +8016,7 @@ def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -7423,7 +8030,7 @@ def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -7437,7 +8044,7 @@ def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -7451,7 +8058,7 @@ def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -7465,7 +8072,7 @@ def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7479,7 +8086,7 @@ def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7490,7 +8097,7 @@ def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 < S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7503,7 +8110,7 @@ def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i16 == S1.i16 S0 = Reg(s0) @@ -7517,7 +8124,7 @@ def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7530,7 +8137,7 @@ def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 > S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7543,7 +8150,7 @@ def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <> S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7556,7 +8163,7 @@ def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 >= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7569,7 +8176,7 @@ def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 < S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7582,7 +8189,7 @@ def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u16 == S1.u16 S0 = Reg(s0) @@ -7596,7 +8203,7 @@ def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7609,7 +8216,7 @@ def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 > S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7622,7 +8229,7 @@ def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <> S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7635,7 +8242,7 @@ def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 >= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7648,7 +8255,7 @@ def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7659,7 +8266,7 @@ def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7672,7 +8279,7 @@ def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i32 == S1.i32 S0 = Reg(s0) @@ -7686,7 +8293,7 @@ def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7699,7 +8306,7 @@ def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7712,7 +8319,7 @@ def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7725,7 +8332,7 @@ def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7738,7 +8345,7 @@ def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7749,7 +8356,7 @@ def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7760,7 +8367,7 @@ def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7773,7 +8380,7 @@ def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u32 == S1.u32 S0 = Reg(s0) @@ -7787,7 +8394,7 @@ def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7800,7 +8407,7 @@ def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7813,7 +8420,7 @@ def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7826,7 +8433,7 @@ def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7839,7 +8446,7 @@ def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7850,7 +8457,7 @@ def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7861,7 +8468,7 @@ def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 < S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7874,7 +8481,7 @@ def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i64 == S1.i64 S0 = Reg(s0) @@ -7888,7 +8495,7 @@ def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7901,7 +8508,7 @@ def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 > S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7914,7 +8521,7 @@ def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <> S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7927,7 +8534,7 @@ def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 >= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7940,7 +8547,7 @@ def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7951,7 +8558,7 @@ def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7962,7 +8569,7 @@ def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 < S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7975,7 +8582,7 @@ def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u64 == S1.u64 S0 = Reg(s0) @@ -7989,7 +8596,7 @@ def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -8002,7 +8609,7 @@ def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 > S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -8015,7 +8622,7 @@ def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -8028,7 +8635,7 @@ def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 >= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -8041,7 +8648,7 @@ def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -8052,7 +8659,7 @@ def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -8105,7 +8712,7 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -8158,7 +8765,7 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -8211,7 +8818,7 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -8221,7 +8828,7 @@ def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE64 then # // 64 lanes @@ -8264,7 +8871,7 @@ def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8274,7 +8881,7 @@ def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -8285,7 +8892,7 @@ def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -8295,7 +8902,7 @@ def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8305,7 +8912,7 @@ def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8315,7 +8922,7 @@ def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8325,7 +8932,7 @@ def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8335,7 +8942,7 @@ def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8345,7 +8952,7 @@ def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -8355,7 +8962,7 @@ def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -8365,7 +8972,7 @@ def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8375,7 +8982,7 @@ def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8386,7 +8993,7 @@ def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8396,7 +9003,7 @@ def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8406,7 +9013,7 @@ def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8416,7 +9023,7 @@ def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8426,7 +9033,7 @@ def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8436,7 +9043,7 @@ def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8447,7 +9054,7 @@ def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8458,7 +9065,7 @@ def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -8474,7 +9081,7 @@ def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -8490,7 +9097,7 @@ def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -8506,7 +9113,7 @@ def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b16 = S0.b16 S0 = Reg(s0) D0 = Reg(d0) @@ -8516,7 +9123,7 @@ def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8526,7 +9133,7 @@ def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8536,7 +9143,7 @@ def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -8551,7 +9158,7 @@ def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -8566,7 +9173,7 @@ def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -8581,7 +9188,7 @@ def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8591,7 +9198,7 @@ def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8601,7 +9208,7 @@ def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -8611,7 +9218,7 @@ def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -8622,7 +9229,7 @@ def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8632,7 +9239,7 @@ def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -8643,7 +9250,7 @@ def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8654,7 +9261,7 @@ def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8664,7 +9271,7 @@ def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8675,7 +9282,7 @@ def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8685,7 +9292,7 @@ def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8695,7 +9302,7 @@ def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -8705,7 +9312,7 @@ def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -8715,7 +9322,7 @@ def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -8735,7 +9342,7 @@ def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -8755,7 +9362,7 @@ def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -8775,7 +9382,7 @@ def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -8792,7 +9399,7 @@ def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -8810,7 +9417,7 @@ def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8821,7 +9428,7 @@ def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -8838,7 +9445,7 @@ def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -8855,7 +9462,7 @@ def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # addr = SRC0.u32; # // Raw value from instruction # D0.b32 = VGPR[laneId][addr].b32 @@ -8869,7 +9476,7 @@ def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -8879,7 +9486,7 @@ def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -8889,7 +9496,7 @@ def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8899,7 +9506,7 @@ def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8909,7 +9516,7 @@ def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -8919,7 +9526,7 @@ def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8929,7 +9536,7 @@ def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8939,7 +9546,7 @@ def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8949,7 +9556,7 @@ def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8959,7 +9566,7 @@ def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -8976,7 +9583,7 @@ def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -8993,7 +9600,7 @@ def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -9008,7 +9615,7 @@ def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -9023,7 +9630,7 @@ def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -9033,7 +9640,7 @@ def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -9048,7 +9655,7 @@ def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + -floor(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -9058,7 +9665,7 @@ def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -9068,7 +9675,7 @@ def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -9078,7 +9685,17 @@ def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.b16 = { SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16) } + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b16 = _pack(SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_snorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -9088,7 +9705,7 @@ def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_unorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -9098,7 +9715,7 @@ def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = ~S0.u16 S0 = Reg(s0) D0 = Reg(d0) @@ -9108,7 +9725,7 @@ def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -9118,7 +9735,7 @@ def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { 16'0, S0.u16 } S0 = Reg(s0) D0 = Reg(d0) @@ -9128,7 +9745,7 @@ def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9142,7 +9759,7 @@ def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9153,7 +9770,7 @@ def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9164,7 +9781,7 @@ def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9175,7 +9792,7 @@ def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = S2.f32 @@ -9195,7 +9812,7 @@ def _VOP3Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -9214,7 +9831,7 @@ def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9225,7 +9842,7 @@ def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -9236,7 +9853,7 @@ def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -9247,7 +9864,7 @@ def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -9258,7 +9875,7 @@ def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -9269,7 +9886,7 @@ def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -9329,7 +9946,7 @@ def _VOP3Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -9389,7 +10006,7 @@ def _VOP3Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9400,7 +10017,7 @@ def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9411,7 +10028,7 @@ def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9422,7 +10039,7 @@ def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9433,7 +10050,7 @@ def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9444,7 +10061,7 @@ def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9455,7 +10072,7 @@ def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9466,7 +10083,7 @@ def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9477,7 +10094,7 @@ def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9488,7 +10105,7 @@ def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9499,7 +10116,7 @@ def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9510,7 +10127,7 @@ def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9521,7 +10138,7 @@ def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9532,7 +10149,7 @@ def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9543,7 +10160,7 @@ def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -9554,7 +10171,7 @@ def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -9569,7 +10186,7 @@ def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9580,7 +10197,7 @@ def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9591,7 +10208,7 @@ def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9602,7 +10219,7 @@ def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9613,7 +10230,7 @@ def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -9624,7 +10241,7 @@ def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -9684,7 +10301,7 @@ def _VOP3Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -9744,7 +10361,7 @@ def _VOP3Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -9755,7 +10372,7 @@ def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = S2.f32 @@ -9775,7 +10392,7 @@ def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9787,7 +10404,7 @@ def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9799,7 +10416,7 @@ def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9848,7 +10465,7 @@ def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap S coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9890,7 +10507,7 @@ def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap T coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9925,7 +10542,7 @@ def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = 2.0 * cubemap major axis. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9953,7 +10570,7 @@ def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) S0 = Reg(s0) S1 = Reg(s1) @@ -9965,7 +10582,7 @@ def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) S0 = Reg(s0) @@ -9980,7 +10597,7 @@ def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) S0 = Reg(s0) S1 = Reg(s1) @@ -9992,7 +10609,7 @@ def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10004,7 +10621,7 @@ def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = fma(S0.f64, S1.f64, S2.f64) S0 = Reg(s0) S1 = Reg(s1) @@ -10017,7 +10634,7 @@ def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); @@ -10038,7 +10655,7 @@ def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0].u32) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -10050,7 +10667,7 @@ def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0].u32 * 8U)) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -10062,7 +10679,7 @@ def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S1.f32 == -MAX_FLOAT_F32) || (64'F(S1.f32) == -INF) || isNAN(64'F(S1.f32)) || (S2.f32 <= 0.0F) || # isNAN(64'F(S2.f32))) then # D0.f32 = -MAX_FLOAT_F32 @@ -10082,7 +10699,7 @@ def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10094,7 +10711,7 @@ def _VOP3Op_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10106,7 +10723,7 @@ def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10118,7 +10735,7 @@ def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10130,7 +10747,7 @@ def _VOP3Op_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10142,7 +10759,7 @@ def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10154,7 +10771,7 @@ def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then # D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then @@ -10181,7 +10798,7 @@ def _VOP3Op_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then # D0.i32 = v_max_i32(S1.i32, S2.i32) # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then @@ -10204,7 +10821,7 @@ def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then # D0.u32 = v_max_u32(S1.u32, S2.u32) # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then @@ -10227,7 +10844,7 @@ def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -10251,7 +10868,7 @@ def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10263,7 +10880,7 @@ def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); @@ -10283,7 +10900,7 @@ def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 S0 = Reg(s0) @@ -10296,7 +10913,7 @@ def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); # D0.u32 = tmp @@ -10313,7 +10930,7 @@ def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f32) ^ sign(S2.f32)); # if isNAN(64'F(S2.f32)) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) @@ -10366,7 +10983,7 @@ def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f64) ^ sign(S2.f64)); # if isNAN(S2.f64) then # D0.f64 = cvtToQuietNAN(S2.f64) @@ -10420,7 +11037,7 @@ def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) # else @@ -10442,7 +11059,7 @@ def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) # else @@ -10465,7 +11082,7 @@ def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -10489,7 +11106,7 @@ def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -10511,7 +11128,7 @@ def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -10533,7 +11150,7 @@ def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); @@ -10554,7 +11171,7 @@ def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10566,7 +11183,7 @@ def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 + S2.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -10578,7 +11195,25 @@ def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); + # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); + # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); + # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) + D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) + D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) + D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10590,7 +11225,7 @@ def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10602,7 +11237,7 @@ def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10614,7 +11249,7 @@ def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10626,7 +11261,7 @@ def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10638,7 +11273,7 @@ def _VOP3Op_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -10650,7 +11285,7 @@ def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -10662,7 +11297,7 @@ def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10674,7 +11309,7 @@ def _VOP3Op_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -10686,7 +11321,7 @@ def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -10698,7 +11333,7 @@ def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then # D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then @@ -10725,7 +11360,7 @@ def _VOP3Op_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then # D0.i16 = v_max_i16(S1.i16, S2.i16) # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then @@ -10748,7 +11383,7 @@ def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then # D0.u16 = v_max_u16(S1.u16, S2.u16) # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then @@ -10771,7 +11406,7 @@ def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 * S1.i16 + S2.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -10783,7 +11418,7 @@ def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f16) ^ sign(S2.f16)); # if isNAN(64'F(S2.f16)) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) @@ -10828,7 +11463,7 @@ def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10840,7 +11475,7 @@ def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10852,7 +11487,7 @@ def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10864,7 +11499,7 @@ def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32 | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10876,7 +11511,7 @@ def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10888,7 +11523,7 @@ def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -10900,7 +11535,7 @@ def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = VCC.u64[laneId] ? S1.u16 : S0.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -10914,7 +11549,7 @@ def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_MAXMIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_min_f32(v_max_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10926,7 +11561,7 @@ def _VOP3Op_V_MAXMIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_max_f32(v_min_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10938,7 +11573,7 @@ def _VOP3Op_V_MINMAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_min_f16(v_max_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10950,7 +11585,7 @@ def _VOP3Op_V_MAXMIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_max_f16(v_min_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10962,7 +11597,7 @@ def _VOP3Op_V_MINMAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10974,7 +11609,7 @@ def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10986,7 +11621,7 @@ def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10998,7 +11633,7 @@ def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -11010,7 +11645,7 @@ def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f16; # tmp += S0[15 : 0].f16 * S1[15 : 0].f16; # tmp += S0[31 : 16].f16 * S1[31 : 16].f16; @@ -11029,7 +11664,7 @@ def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.bf16; # tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16; # tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16; @@ -11048,7 +11683,7 @@ def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 + S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11059,7 +11694,7 @@ def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 - S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11070,7 +11705,7 @@ def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11081,7 +11716,7 @@ def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[31 : 16] = 16'B(v_cvt_i16_f32(S1.f32)); # tmp[15 : 0] = 16'B(v_cvt_i16_f32(S0.f32)); @@ -11095,7 +11730,7 @@ def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[31 : 16] = 16'B(v_cvt_u16_f32(S1.f32)); # tmp[15 : 0] = 16'B(v_cvt_u16_f32(S0.f32)); @@ -11109,7 +11744,7 @@ def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11120,7 +11755,7 @@ def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11131,7 +11766,7 @@ def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11142,7 +11777,7 @@ def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11153,7 +11788,7 @@ def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 + S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11164,7 +11799,7 @@ def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 - S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11175,7 +11810,7 @@ def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[31 : 16].f16 = S1.f16; # D0[15 : 0].f16 = S0.f16 S0 = Reg(s0) @@ -11188,7 +11823,7 @@ def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); @@ -11202,7 +11837,7 @@ def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); @@ -11216,7 +11851,7 @@ def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * 2.0F ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11227,7 +11862,7 @@ def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11238,7 +11873,7 @@ def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32; # for i in 0 : 31 do # tmp += S0[i].u32; @@ -11258,7 +11893,7 @@ def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); @@ -11272,7 +11907,7 @@ def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); @@ -11286,7 +11921,7 @@ def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = u32_to_u16(S0.u32); # tmp[31 : 16].u16 = u32_to_u16(S1.u32); @@ -11300,7 +11935,7 @@ def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = i32_to_i16(S0.i32); # tmp[31 : 16].i16 = i32_to_i16(S1.i32); @@ -11314,7 +11949,7 @@ def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11325,7 +11960,7 @@ def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 + S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11336,7 +11971,7 @@ def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -11348,7 +11983,7 @@ def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -11360,7 +11995,7 @@ def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(S0.f64) then @@ -11421,7 +12056,7 @@ def _VOP3Op_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(S0.f64) then @@ -11482,7 +12117,7 @@ def _VOP3Op_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * 2.0 ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11494,7 +12129,7 @@ def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 * S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -11505,7 +12140,7 @@ def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -11516,7 +12151,7 @@ def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -11527,7 +12162,7 @@ def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11538,7 +12173,7 @@ def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11549,7 +12184,7 @@ def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = (S1.i16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11560,7 +12195,7 @@ def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 << S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11572,7 +12207,7 @@ def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11584,7 +12219,7 @@ def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (S1.i64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11596,7 +12231,7 @@ def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE32 then # lane = S1.u32[4 : 0].u32; @@ -11619,7 +12254,7 @@ def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 & S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11630,7 +12265,7 @@ def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 | S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11641,7 +12276,7 @@ def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 ^ S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11913,6 +12548,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_FRACT_F16: _VOP3Op_V_FRACT_F16, VOP3Op.V_SIN_F16: _VOP3Op_V_SIN_F16, VOP3Op.V_COS_F16: _VOP3Op_V_COS_F16, + VOP3Op.V_SAT_PK_U8_I16: _VOP3Op_V_SAT_PK_U8_I16, VOP3Op.V_CVT_NORM_I16_F16: _VOP3Op_V_CVT_NORM_I16_F16, VOP3Op.V_CVT_NORM_U16_F16: _VOP3Op_V_CVT_NORM_U16_F16, VOP3Op.V_NOT_B16: _VOP3Op_V_NOT_B16, @@ -11995,6 +12631,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MQSAD_U32_U8: _VOP3Op_V_MQSAD_U32_U8, VOP3Op.V_XOR3_B32: _VOP3Op_V_XOR3_B32, VOP3Op.V_MAD_U16: _VOP3Op_V_MAD_U16, + VOP3Op.V_PERM_B32: _VOP3Op_V_PERM_B32, VOP3Op.V_XAD_U32: _VOP3Op_V_XAD_U32, VOP3Op.V_LSHL_ADD_U32: _VOP3Op_V_LSHL_ADD_U32, VOP3Op.V_ADD_LSHL_U32: _VOP3Op_V_ADD_LSHL_U32, @@ -12070,7 +12707,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_XOR_B16: _VOP3Op_V_XOR_B16, } -def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -12090,7 +12727,7 @@ def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12110,7 +12747,7 @@ def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12130,7 +12767,7 @@ def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # D0.f32 = NAN.f32 @@ -12193,7 +12830,7 @@ def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then # D0.f64 = NAN.f64 @@ -12257,7 +12894,7 @@ def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3SDOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) S0 = Reg(s0) S1 = Reg(s1) @@ -12274,7 +12911,7 @@ def _VOP3SDOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d1'] = D1._val & 1 return result -def _VOP3SDOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) S0 = Reg(s0) S1 = Reg(s1) @@ -12291,7 +12928,7 @@ def _VOP3SDOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d1'] = D1._val & 1 return result -def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -12311,7 +12948,7 @@ def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12331,7 +12968,7 @@ def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32; # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12364,7 +13001,7 @@ VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_SUBREV_CO_U32: _VOP3SDOp_V_SUBREV_CO_U32, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; # D0.b32 = tmp.b32 @@ -12381,7 +13018,7 @@ def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12397,7 +13034,7 @@ def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -12413,7 +13050,7 @@ def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -12429,7 +13066,7 @@ def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12445,7 +13082,7 @@ def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12461,7 +13098,7 @@ def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12477,7 +13114,7 @@ def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -12493,7 +13130,7 @@ def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -12509,7 +13146,7 @@ def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12526,7 +13163,7 @@ def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12542,7 +13179,7 @@ def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12558,7 +13195,7 @@ def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12574,7 +13211,7 @@ def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12590,7 +13227,7 @@ def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); @@ -12608,7 +13245,7 @@ def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; # D0.b32 = tmp.b32 @@ -12624,7 +13261,7 @@ def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; # D0.b32 = tmp.b32 @@ -12640,7 +13277,7 @@ def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16); # tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16); # D0.b32 = tmp.b32 @@ -12656,7 +13293,7 @@ def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16); # tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16); # D0.b32 = tmp.b32 @@ -12672,7 +13309,7 @@ def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -12691,7 +13328,7 @@ def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); @@ -12714,7 +13351,7 @@ def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); @@ -12745,7 +13382,7 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); @@ -12790,13 +13427,14 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_DOT2_F32_BF16: _VOP3POp_V_DOT2_F32_BF16, } -def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -12804,9 +13442,11 @@ def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -12815,6 +13455,7 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- @@ -12822,9 +13463,11 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -12833,6 +13476,7 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- @@ -12840,9 +13484,11 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12850,6 +13496,7 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- @@ -12857,9 +13504,11 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -12868,6 +13517,7 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- @@ -12875,9 +13525,11 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12885,6 +13537,7 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- @@ -12892,9 +13545,11 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12902,6 +13557,7 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- @@ -12909,9 +13565,11 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -12920,6 +13578,7 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- @@ -12927,9 +13586,11 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -12938,6 +13599,7 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- @@ -12945,9 +13607,11 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -12956,6 +13620,7 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- @@ -12963,9 +13628,11 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -12974,6 +13641,7 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- @@ -12981,9 +13649,11 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -12993,6 +13663,7 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- @@ -13000,9 +13671,11 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13011,6 +13684,7 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- @@ -13018,9 +13692,11 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -13030,6 +13706,7 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- @@ -13037,9 +13714,11 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -13049,6 +13728,7 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- @@ -13056,15 +13736,18 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -13072,15 +13755,18 @@ def _VOPCOp_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -13088,9 +13774,11 @@ def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -13099,6 +13787,7 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- @@ -13106,9 +13795,11 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -13117,6 +13808,7 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- @@ -13124,9 +13816,11 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13134,6 +13828,7 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- @@ -13141,9 +13836,11 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -13152,6 +13849,7 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- @@ -13159,9 +13857,11 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13169,6 +13869,7 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- @@ -13176,9 +13877,11 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13186,6 +13889,7 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- @@ -13193,9 +13897,11 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -13204,6 +13910,7 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- @@ -13211,9 +13918,11 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -13222,6 +13931,7 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- @@ -13229,9 +13939,11 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -13240,6 +13952,7 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- @@ -13247,9 +13960,11 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -13258,6 +13973,7 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- @@ -13265,9 +13981,11 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -13277,6 +13995,7 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- @@ -13284,9 +14003,11 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13295,6 +14016,7 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- @@ -13302,9 +14024,11 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -13314,6 +14038,7 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- @@ -13321,9 +14046,11 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -13333,6 +14060,7 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- @@ -13340,15 +14068,18 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -13356,15 +14087,18 @@ def _VOPCOp_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -13372,9 +14106,11 @@ def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -13383,6 +14119,7 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- @@ -13390,9 +14127,11 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -13401,6 +14140,7 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- @@ -13408,9 +14148,11 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13418,6 +14160,7 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- @@ -13425,9 +14168,11 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -13436,6 +14181,7 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- @@ -13443,9 +14189,11 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13453,6 +14201,7 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- @@ -13460,9 +14209,11 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13470,6 +14221,7 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- @@ -13477,9 +14229,11 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -13488,6 +14242,7 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- @@ -13495,9 +14250,11 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -13506,6 +14263,7 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- @@ -13513,9 +14271,11 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -13524,6 +14284,7 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- @@ -13531,9 +14292,11 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -13542,6 +14305,7 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- @@ -13549,9 +14313,11 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -13561,6 +14327,7 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- @@ -13568,9 +14335,11 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13579,6 +14348,7 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- @@ -13586,9 +14356,11 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -13598,6 +14370,7 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- @@ -13605,9 +14378,11 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -13617,6 +14392,7 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- @@ -13624,15 +14400,18 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -13640,9 +14419,11 @@ def _VOPCOp_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -13651,6 +14432,7 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- @@ -13658,9 +14440,11 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -13669,6 +14453,7 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- @@ -13676,9 +14461,11 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13686,6 +14473,7 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- @@ -13693,9 +14481,11 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -13704,6 +14494,7 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- @@ -13711,9 +14502,11 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -13722,6 +14515,7 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- @@ -13729,9 +14523,11 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13739,6 +14535,7 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- @@ -13746,9 +14543,11 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -13757,6 +14556,7 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- @@ -13764,9 +14564,11 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -13775,6 +14577,7 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- @@ -13782,9 +14585,11 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13792,6 +14597,7 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- @@ -13799,9 +14605,11 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -13810,6 +14618,7 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- @@ -13817,9 +14626,11 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -13828,6 +14639,7 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- @@ -13835,9 +14647,11 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13845,6 +14659,7 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- @@ -13852,15 +14667,18 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -13868,9 +14686,11 @@ def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -13879,6 +14699,7 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- @@ -13886,9 +14707,11 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -13897,6 +14720,7 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- @@ -13904,9 +14728,11 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13914,6 +14740,7 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- @@ -13921,9 +14748,11 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -13932,6 +14761,7 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- @@ -13939,9 +14769,11 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -13950,6 +14782,7 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- @@ -13957,9 +14790,11 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13967,6 +14802,7 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- @@ -13974,15 +14810,18 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -13990,15 +14829,18 @@ def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -14006,9 +14848,11 @@ def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -14017,6 +14861,7 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- @@ -14024,9 +14869,11 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -14035,6 +14882,7 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- @@ -14042,9 +14890,11 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14052,6 +14902,7 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- @@ -14059,9 +14910,11 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -14070,6 +14923,7 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- @@ -14077,9 +14931,11 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -14088,6 +14944,7 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- @@ -14095,9 +14952,11 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14105,6 +14964,7 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- @@ -14112,15 +14972,18 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -14128,15 +14991,18 @@ def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -14144,9 +15010,11 @@ def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -14155,6 +15023,7 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- @@ -14162,9 +15031,11 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -14173,6 +15044,7 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- @@ -14180,9 +15052,11 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14190,6 +15064,7 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- @@ -14197,9 +15072,11 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -14208,6 +15085,7 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- @@ -14215,9 +15093,11 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -14226,6 +15106,7 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- @@ -14233,9 +15114,11 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14243,6 +15126,7 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- @@ -14250,15 +15134,18 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -14266,15 +15153,18 @@ def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -14282,9 +15172,11 @@ def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -14293,6 +15185,7 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- @@ -14300,9 +15193,11 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -14311,6 +15206,7 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- @@ -14318,9 +15214,11 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14328,6 +15226,7 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- @@ -14335,9 +15234,11 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -14346,6 +15247,7 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- @@ -14353,9 +15255,11 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -14364,6 +15268,7 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- @@ -14371,9 +15276,11 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14381,6 +15288,7 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- @@ -14388,15 +15296,18 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -14404,9 +15315,11 @@ def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14443,6 +15356,7 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -14462,9 +15376,11 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14501,6 +15417,7 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -14520,9 +15437,11 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14559,6 +15478,7 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -14578,9 +15498,11 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -14591,7 +15513,7 @@ def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14604,7 +15526,7 @@ def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f16 == S1.f16 S0 = Reg(s0) @@ -14618,7 +15540,7 @@ def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14631,7 +15553,7 @@ def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14644,7 +15566,7 @@ def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14657,7 +15579,7 @@ def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14670,7 +15592,7 @@ def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -14683,7 +15605,7 @@ def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -14696,7 +15618,7 @@ def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -14710,7 +15632,7 @@ def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -14724,7 +15646,7 @@ def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -14738,7 +15660,7 @@ def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -14752,7 +15674,7 @@ def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -14766,7 +15688,7 @@ def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -14780,7 +15702,7 @@ def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -14791,7 +15713,7 @@ def _VOPCOp_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -14802,7 +15724,7 @@ def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14815,7 +15737,7 @@ def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f32 == S1.f32 S0 = Reg(s0) @@ -14829,7 +15751,7 @@ def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14842,7 +15764,7 @@ def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14855,7 +15777,7 @@ def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14868,7 +15790,7 @@ def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14881,7 +15803,7 @@ def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -14894,7 +15816,7 @@ def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -14907,7 +15829,7 @@ def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -14921,7 +15843,7 @@ def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -14935,7 +15857,7 @@ def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -14949,7 +15871,7 @@ def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -14963,7 +15885,7 @@ def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -14977,7 +15899,7 @@ def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -14991,7 +15913,7 @@ def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15002,7 +15924,7 @@ def _VOPCOp_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -15013,7 +15935,7 @@ def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 < S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15026,7 +15948,7 @@ def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f64 == S1.f64 S0 = Reg(s0) @@ -15040,7 +15962,7 @@ def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15053,7 +15975,7 @@ def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 > S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15066,7 +15988,7 @@ def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <> S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15079,7 +16001,7 @@ def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 >= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15092,7 +16014,7 @@ def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -15105,7 +16027,7 @@ def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -15118,7 +16040,7 @@ def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -15132,7 +16054,7 @@ def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -15146,7 +16068,7 @@ def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -15160,7 +16082,7 @@ def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -15174,7 +16096,7 @@ def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -15188,7 +16110,7 @@ def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -15202,7 +16124,7 @@ def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15213,7 +16135,7 @@ def _VOPCOp_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 < S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15226,7 +16148,7 @@ def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i16 == S1.i16 S0 = Reg(s0) @@ -15240,7 +16162,7 @@ def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15253,7 +16175,7 @@ def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 > S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15266,7 +16188,7 @@ def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <> S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15279,7 +16201,7 @@ def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 >= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15292,7 +16214,7 @@ def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 < S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15305,7 +16227,7 @@ def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u16 == S1.u16 S0 = Reg(s0) @@ -15319,7 +16241,7 @@ def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15332,7 +16254,7 @@ def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 > S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15345,7 +16267,7 @@ def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <> S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15358,7 +16280,7 @@ def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 >= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15371,7 +16293,7 @@ def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -15382,7 +16304,7 @@ def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15395,7 +16317,7 @@ def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i32 == S1.i32 S0 = Reg(s0) @@ -15409,7 +16331,7 @@ def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15422,7 +16344,7 @@ def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15435,7 +16357,7 @@ def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15448,7 +16370,7 @@ def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15461,7 +16383,7 @@ def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15472,7 +16394,7 @@ def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -15483,7 +16405,7 @@ def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15496,7 +16418,7 @@ def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u32 == S1.u32 S0 = Reg(s0) @@ -15510,7 +16432,7 @@ def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15523,7 +16445,7 @@ def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15536,7 +16458,7 @@ def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15549,7 +16471,7 @@ def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15562,7 +16484,7 @@ def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15573,7 +16495,7 @@ def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -15584,7 +16506,7 @@ def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 < S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15597,7 +16519,7 @@ def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i64 == S1.i64 S0 = Reg(s0) @@ -15611,7 +16533,7 @@ def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15624,7 +16546,7 @@ def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 > S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15637,7 +16559,7 @@ def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <> S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15650,7 +16572,7 @@ def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 >= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15663,7 +16585,7 @@ def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15674,7 +16596,7 @@ def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -15685,7 +16607,7 @@ def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 < S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15698,7 +16620,7 @@ def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u64 == S1.u64 S0 = Reg(s0) @@ -15712,7 +16634,7 @@ def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15725,7 +16647,7 @@ def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 > S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15738,7 +16660,7 @@ def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15751,7 +16673,7 @@ def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 >= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15764,7 +16686,7 @@ def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15775,7 +16697,7 @@ def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -15828,7 +16750,7 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -15881,7 +16803,7 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. diff --git a/extra/assembly/amd/autogen/rdna4/gen_pcode.py b/extra/assembly/amd/autogen/rdna4/gen_pcode.py index e7cc670a9b..15a92ee453 100644 --- a/extra/assembly/amd/autogen/rdna4/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna4/gen_pcode.py @@ -5,7 +5,7 @@ from extra.assembly.amd.autogen.rdna4 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -15,7 +15,7 @@ def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b64 = S0.b64 S0 = Reg(s0) D0 = Reg(d0) @@ -26,7 +26,7 @@ def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b32 = S0.b32 # endif @@ -40,7 +40,7 @@ def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b64 = S0.b64 # endif @@ -55,7 +55,7 @@ def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -65,7 +65,7 @@ def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[63 : 0] = S0.u64[0 : 63] S0 = Reg(s0) D0 = Reg(d0) @@ -76,7 +76,7 @@ def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -99,7 +99,7 @@ def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -122,7 +122,7 @@ def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -145,7 +145,7 @@ def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -168,7 +168,7 @@ def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -191,7 +191,7 @@ def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 63 do @@ -214,7 +214,7 @@ def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i8)) S0 = Reg(s0) D0 = Reg(d0) @@ -224,7 +224,7 @@ def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -234,7 +234,7 @@ def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -244,7 +244,7 @@ def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -255,7 +255,7 @@ def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -265,7 +265,7 @@ def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -276,7 +276,7 @@ def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32; # for i in 0 : 31 do # D0.u64[i * 2] = tmp[i]; @@ -295,7 +295,7 @@ def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -308,7 +308,7 @@ def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'0U ? 1 : 0 @@ -329,7 +329,7 @@ def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'0U ? 1 : 0 @@ -351,7 +351,7 @@ def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'1U ? 1 : 0 @@ -372,7 +372,7 @@ def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'1U ? 1 : 0 @@ -394,7 +394,7 @@ def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # for i in 0 : 7 do # tmp[i] = S0.u32[i * 4 +: 4] != 0U @@ -415,7 +415,7 @@ def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # for i in 0 : 15 do # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL @@ -437,7 +437,7 @@ def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # declare i : 6'U; # for i in 6'0U : 6'31U do @@ -459,7 +459,7 @@ def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # declare i : 6'U; # for i in 6'0U : 6'63U do @@ -482,7 +482,7 @@ def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32; # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -495,7 +495,7 @@ def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~S0.u64; # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -509,7 +509,7 @@ def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -531,7 +531,7 @@ def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -554,7 +554,7 @@ def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u32; @@ -576,7 +576,7 @@ def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u64; @@ -599,7 +599,7 @@ def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -621,7 +621,7 @@ def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -644,7 +644,7 @@ def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -666,7 +666,7 @@ def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -689,7 +689,7 @@ def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -711,7 +711,7 @@ def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -734,7 +734,7 @@ def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -756,7 +756,7 @@ def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -779,7 +779,7 @@ def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u32; @@ -801,7 +801,7 @@ def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -824,7 +824,7 @@ def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result['d0_64'] = True return result -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u32; @@ -846,7 +846,7 @@ def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -869,7 +869,7 @@ def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u32; @@ -891,7 +891,7 @@ def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -914,7 +914,7 @@ def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result['d0_64'] = True return result -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u32; @@ -936,7 +936,7 @@ def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -959,7 +959,7 @@ def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -979,7 +979,7 @@ def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1000,7 +1000,7 @@ def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1020,7 +1020,7 @@ def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1041,9 +1041,65 @@ def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result['d0_64'] = True return result -def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # jump_addr = S0.i64; + # D0.i64 = PC + 4LL; + # PC = jump_addr.i64 + S0 = Reg(s0) + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + jump_addr = S0.i64 + D0.i64 = PC + 4 + PC = Reg(jump_addr.i64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # If SDST is VCC then VCCZ is undefined. VCC = Reg(vcc) + VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- # --- end pseudocode --- @@ -1051,9 +1107,10 @@ def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # If SDST is VCC then VCCZ is undefined. VCC = Reg(vcc) + VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- # --- end pseudocode --- @@ -1061,7 +1118,7 @@ def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _SOP1Op_S_BARRIER_SIGNAL(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BARRIER_SIGNAL(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if !InWorkgroup() then # elsif ((barrierNumber == -2) && !WAVE_STATUS.PRIV) then # elsif barrierNumber == 0 then @@ -1081,7 +1138,7 @@ def _SOP1Op_S_BARRIER_SIGNAL(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _SOP1Op_S_BARRIER_SIGNAL_ISFIRST(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BARRIER_SIGNAL_ISFIRST(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if !InWorkgroup() then # SCC = 1'0U # elsif ((barrierNumber == -2) && !WAVE_STATUS.PRIV) then @@ -1108,7 +1165,7 @@ def _SOP1Op_S_BARRIER_SIGNAL_ISFIRST(s0, s1, s2, d0, scc, vcc, lane, exec_mask, result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOP1Op_S_GET_BARRIER_STATE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_GET_BARRIER_STATE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U({ 9'0, BARRIER_STATE[barrierNumber & 63].signalCnt.u7, 5'0, BARRIER_STATE[barrierNumber & D0 = Reg(d0) # --- compiled pseudocode --- @@ -1117,7 +1174,7 @@ def _SOP1Op_S_GET_BARRIER_STATE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_ALLOC_VGPR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ALLOC_VGPR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # n = ReallocVgprs(32'I(S0[8 : 0].u32)); # if n < 0 then # SCC = 1'0U @@ -1138,7 +1195,7 @@ def _SOP1Op_S_ALLOC_VGPR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOP1Op_S_SLEEP_VAR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SLEEP_VAR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S0[6:0] determines the sleep duration. The wave sleeps for (64*(S0[6:0]-1) … 64*S0[6:0]) clocks. The exact S0 = Reg(s0) # --- compiled pseudocode --- @@ -1147,7 +1204,7 @@ def _SOP1Op_S_SLEEP_VAR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': scc & 1} return result -def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -1162,7 +1219,7 @@ def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -1177,7 +1234,7 @@ def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1187,7 +1244,7 @@ def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -1202,7 +1259,7 @@ def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -1212,7 +1269,7 @@ def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -1222,7 +1279,7 @@ def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1232,7 +1289,7 @@ def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1242,7 +1299,7 @@ def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1252,7 +1309,7 @@ def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1262,7 +1319,7 @@ def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0[31 : 16].f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1272,7 +1329,7 @@ def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -1287,7 +1344,7 @@ def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -1302,7 +1359,7 @@ def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1312,7 +1369,7 @@ def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -1382,6 +1439,10 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_AND_NOT0_WREXEC_B64: _SOP1Op_S_AND_NOT0_WREXEC_B64, SOP1Op.S_AND_NOT1_WREXEC_B32: _SOP1Op_S_AND_NOT1_WREXEC_B32, SOP1Op.S_AND_NOT1_WREXEC_B64: _SOP1Op_S_AND_NOT1_WREXEC_B64, + SOP1Op.S_GETPC_B64: _SOP1Op_S_GETPC_B64, + SOP1Op.S_SETPC_B64: _SOP1Op_S_SETPC_B64, + SOP1Op.S_SWAPPC_B64: _SOP1Op_S_SWAPPC_B64, + SOP1Op.S_RFE_B64: _SOP1Op_S_RFE_B64, SOP1Op.S_SENDMSG_RTN_B32: _SOP1Op_S_SENDMSG_RTN_B32, SOP1Op.S_SENDMSG_RTN_B64: _SOP1Op_S_SENDMSG_RTN_B64, SOP1Op.S_BARRIER_SIGNAL: _SOP1Op_S_BARRIER_SIGNAL, @@ -1406,7 +1467,7 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_RNDNE_F16: _SOP1Op_S_RNDNE_F16, } -def _SOP2Op_S_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1423,7 +1484,7 @@ def _SOP2Op_S_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1440,7 +1501,7 @@ def _SOP2Op_S_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADD_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 + S1.i32; # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -1457,7 +1518,7 @@ def _SOP2Op_S_ADD_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 - S1.i32; # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -1474,7 +1535,7 @@ def _SOP2Op_S_SUB_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1491,7 +1552,7 @@ def _SOP2Op_S_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - SCC.u32; # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1508,7 +1569,7 @@ def _SOP2Op_S_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32; # if D0.i32 < 0 then # D0.i32 = -D0.i32 @@ -1527,7 +1588,7 @@ def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1541,7 +1602,7 @@ def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 << S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1556,7 +1617,7 @@ def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 >> S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1570,7 +1631,7 @@ def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 >> S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1585,7 +1646,7 @@ def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -1599,7 +1660,7 @@ def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); # SCC = D0.i64 != 0LL S0 = Reg(s0) @@ -1614,7 +1675,7 @@ def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1631,7 +1692,7 @@ def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1648,7 +1709,7 @@ def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1665,7 +1726,7 @@ def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1682,7 +1743,7 @@ def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1696,7 +1757,7 @@ def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1710,7 +1771,7 @@ def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1724,7 +1785,7 @@ def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1738,7 +1799,7 @@ def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1752,7 +1813,7 @@ def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1767,7 +1828,7 @@ def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1781,7 +1842,7 @@ def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1796,7 +1857,7 @@ def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result['d0_64'] = True return result -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1810,7 +1871,7 @@ def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1825,7 +1886,7 @@ def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1839,7 +1900,7 @@ def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1854,7 +1915,7 @@ def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1868,7 +1929,7 @@ def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1883,7 +1944,7 @@ def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1897,7 +1958,7 @@ def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1912,7 +1973,7 @@ def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1926,7 +1987,7 @@ def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1941,7 +2002,7 @@ def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1955,7 +2016,7 @@ def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1970,7 +2031,7 @@ def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1984,7 +2045,7 @@ def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); # SCC = D0.i32 != 0 @@ -2001,7 +2062,7 @@ def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -2016,7 +2077,7 @@ def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); # SCC = D0.i64 != 0LL @@ -2034,7 +2095,7 @@ def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -2045,7 +2106,7 @@ def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -2057,7 +2118,7 @@ def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 * S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2068,7 +2129,7 @@ def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -2079,7 +2140,7 @@ def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -2090,7 +2151,7 @@ def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2102,7 +2163,7 @@ def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = SCC ? S0.u64 : S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2115,7 +2176,7 @@ def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2126,7 +2187,7 @@ def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2137,7 +2198,7 @@ def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2148,7 +2209,7 @@ def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[15 : 0].u16, S0[31 : 16].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2159,7 +2220,7 @@ def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2170,7 +2231,7 @@ def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2181,7 +2242,7 @@ def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -2218,7 +2279,7 @@ def _SOP2Op_S_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -2255,7 +2316,7 @@ def _SOP2Op_S_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2266,7 +2327,7 @@ def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2278,7 +2339,7 @@ def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2290,7 +2351,7 @@ def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2301,7 +2362,7 @@ def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -2316,7 +2377,7 @@ def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2327,7 +2388,7 @@ def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2338,7 +2399,7 @@ def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -2375,7 +2436,7 @@ def _SOP2Op_S_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -2412,7 +2473,7 @@ def _SOP2Op_S_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2423,7 +2484,7 @@ def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -2434,7 +2495,7 @@ def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -2475,7 +2536,7 @@ def _SOP2Op_S_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -2516,7 +2577,7 @@ def _SOP2Op_S_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -2557,7 +2618,7 @@ def _SOP2Op_S_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -2598,7 +2659,7 @@ def _SOP2Op_S_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_ADD_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = S0.u64 + S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2610,7 +2671,7 @@ def _SOP2Op_S_ADD_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _SOP2Op_S_SUB_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = S0.u64 - S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2622,7 +2683,7 @@ def _SOP2Op_S_SUB_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _SOP2Op_S_MUL_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = S0.u64 * S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2711,7 +2772,7 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_MUL_U64: _SOP2Op_S_MUL_U64, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 == S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2722,7 +2783,7 @@ def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2733,7 +2794,7 @@ def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2744,7 +2805,7 @@ def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2755,7 +2816,7 @@ def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2766,7 +2827,7 @@ def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2777,7 +2838,7 @@ def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 == S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2788,7 +2849,7 @@ def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2799,7 +2860,7 @@ def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2810,7 +2871,7 @@ def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2821,7 +2882,7 @@ def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2832,7 +2893,7 @@ def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2843,7 +2904,7 @@ def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -2854,7 +2915,7 @@ def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -2865,7 +2926,7 @@ def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -2876,7 +2937,7 @@ def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -2887,7 +2948,7 @@ def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 == S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2898,7 +2959,7 @@ def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2909,7 +2970,7 @@ def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2920,7 +2981,7 @@ def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2931,7 +2992,7 @@ def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 == S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2942,7 +3003,7 @@ def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 == S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2953,7 +3014,7 @@ def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2964,7 +3025,7 @@ def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2975,7 +3036,7 @@ def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2986,7 +3047,7 @@ def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2997,7 +3058,7 @@ def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3008,7 +3069,7 @@ def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3019,7 +3080,7 @@ def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3030,7 +3091,7 @@ def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3041,7 +3102,7 @@ def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -3052,7 +3113,7 @@ def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -3063,7 +3124,7 @@ def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -3074,7 +3135,7 @@ def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -3085,7 +3146,7 @@ def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -3097,7 +3158,7 @@ def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -3109,7 +3170,7 @@ def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -3121,7 +3182,7 @@ def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -3133,7 +3194,7 @@ def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -3145,7 +3206,7 @@ def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -3157,7 +3218,7 @@ def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -3169,7 +3230,7 @@ def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -3181,7 +3242,7 @@ def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -3193,7 +3254,7 @@ def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -3205,7 +3266,7 @@ def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -3217,7 +3278,7 @@ def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -3278,7 +3339,7 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_NLT_F16: _SOPCOp_S_CMP_NLT_F16, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -3288,7 +3349,7 @@ def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Do nothing - for use by tools only # --- compiled pseudocode --- @@ -3296,7 +3357,7 @@ def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': d0, 'scc': scc & 1} return result -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.i32 = 32'I(signext(S0.i16)) # endif @@ -3310,7 +3371,7 @@ def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_ADDK_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_ADDK_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # D0.i32 = D0.i32 + 32'I(signext(S0.i16)); # SCC = ((tmp[31] == S0.i16[15]) && (tmp[31] != D0.i32[31])); @@ -3326,7 +3387,7 @@ def _SOPKOp_S_ADDK_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = D0.i32 * 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -3336,15 +3397,32 @@ def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result +def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL; + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + D0 = Reg(d0) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, SOPKOp.S_VERSION: _SOPKOp_S_VERSION, SOPKOp.S_CMOVK_I32: _SOPKOp_S_CMOVK_I32, SOPKOp.S_ADDK_CO_I32: _SOPKOp_S_ADDK_CO_I32, SOPKOp.S_MULK_I32: _SOPKOp_S_MULK_I32, + SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # for i in 0U : SIMM16.u16[3 : 0].u32 do # endfor SIMM16 = Reg(literal) @@ -3355,7 +3433,7 @@ def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _var result = {'d0': d0, 'scc': scc & 1} return result -def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # instruction may be omitted. For wave64 the compiler may not know the status of the EXEC mask and hence # // 1 cycle delay here # // 2 cycles delay here @@ -3367,16 +3445,20 @@ def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // PC passed into trap handler points to S_TRAP itself, + # PC = TBA.i64; # // trap base address + PC = Reg(pc) # --- compiled pseudocode --- # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _SOPPOp_S_BARRIER_WAIT(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_BARRIER_WAIT(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // barrierBit 0: reserved # // barrierBit 1: workgroup # // barrierBit 2: trap @@ -3387,14 +3469,163 @@ def _SOPPOp_S_BARRIER_WAIT(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': d0, 'scc': scc & 1} return result +def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 1 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 0 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, SOPPOp.S_DELAY_ALU: _SOPPOp_S_DELAY_ALU, SOPPOp.S_TRAP: _SOPPOp_S_TRAP, SOPPOp.S_BARRIER_WAIT: _SOPPOp_S_BARRIER_WAIT, + SOPPOp.S_BRANCH: _SOPPOp_S_BRANCH, + SOPPOp.S_CBRANCH_SCC0: _SOPPOp_S_CBRANCH_SCC0, + SOPPOp.S_CBRANCH_SCC1: _SOPPOp_S_CBRANCH_SCC1, + SOPPOp.S_CBRANCH_VCCZ: _SOPPOp_S_CBRANCH_VCCZ, + SOPPOp.S_CBRANCH_VCCNZ: _SOPPOp_S_CBRANCH_VCCNZ, + SOPPOp.S_CBRANCH_EXECZ: _SOPPOp_S_CBRANCH_EXECZ, + SOPPOp.S_CBRANCH_EXECNZ: _SOPPOp_S_CBRANCH_EXECNZ, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -3404,7 +3635,7 @@ def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE64 then # // 64 lanes @@ -3447,7 +3678,7 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3457,7 +3688,7 @@ def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -3468,7 +3699,7 @@ def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -3478,7 +3709,7 @@ def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3488,7 +3719,7 @@ def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3498,7 +3729,7 @@ def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3508,7 +3739,7 @@ def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3518,7 +3749,7 @@ def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -3528,7 +3759,7 @@ def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -3538,7 +3769,7 @@ def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -3548,7 +3779,7 @@ def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3558,7 +3789,7 @@ def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3569,7 +3800,7 @@ def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3579,7 +3810,7 @@ def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3589,7 +3820,7 @@ def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3599,7 +3830,7 @@ def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3609,7 +3840,7 @@ def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3619,7 +3850,7 @@ def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3630,7 +3861,7 @@ def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3641,7 +3872,7 @@ def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -3657,7 +3888,7 @@ def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -3673,7 +3904,7 @@ def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -3689,7 +3920,7 @@ def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b16 = S0.b16 S0 = Reg(s0) D0 = Reg(d0) @@ -3699,7 +3930,7 @@ def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3709,7 +3940,7 @@ def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3719,7 +3950,7 @@ def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -3734,7 +3965,7 @@ def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -3749,7 +3980,7 @@ def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -3764,7 +3995,7 @@ def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3774,7 +4005,7 @@ def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3784,7 +4015,7 @@ def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -3794,7 +4025,7 @@ def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -3805,7 +4036,7 @@ def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3815,7 +4046,7 @@ def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -3826,7 +4057,7 @@ def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3837,7 +4068,7 @@ def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3847,7 +4078,7 @@ def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3858,7 +4089,7 @@ def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -3868,7 +4099,7 @@ def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -3878,7 +4109,7 @@ def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -3888,7 +4119,7 @@ def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -3898,7 +4129,7 @@ def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -3918,7 +4149,7 @@ def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -3938,7 +4169,7 @@ def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -3958,7 +4189,7 @@ def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -3975,7 +4206,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -3993,7 +4224,7 @@ def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -4004,7 +4235,7 @@ def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -4021,7 +4252,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -4038,7 +4269,7 @@ def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # addr = SRC0.u32; # // Raw value from instruction # D0.b32 = VGPR[laneId][addr].b32 @@ -4052,7 +4283,7 @@ def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -4062,7 +4293,7 @@ def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -4072,7 +4303,7 @@ def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4082,7 +4313,7 @@ def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4092,7 +4323,7 @@ def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -4102,7 +4333,7 @@ def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4112,7 +4343,7 @@ def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4122,7 +4353,7 @@ def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4132,7 +4363,7 @@ def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4142,7 +4373,7 @@ def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -4159,7 +4390,7 @@ def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -4176,7 +4407,7 @@ def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -4191,7 +4422,7 @@ def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -4206,7 +4437,7 @@ def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4216,7 +4447,7 @@ def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -4231,7 +4462,7 @@ def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + -floor(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4241,7 +4472,7 @@ def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -4251,7 +4482,7 @@ def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -4261,11 +4492,7 @@ def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if n <= 16'0 then - # elsif n >= 16'255 then - # else - # endif); +def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 16'0; # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); @@ -4274,12 +4501,6 @@ def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) tmp = Reg(0) # --- compiled pseudocode --- - if n <= 0: - pass - elif n >= 255: - pass - else: - pass tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) @@ -4288,7 +4509,7 @@ def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_snorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4298,7 +4519,7 @@ def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_unorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4308,7 +4529,7 @@ def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.b32; # D0.b32 = S0.b32; # S0.b32 = tmp @@ -4323,7 +4544,7 @@ def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.b16; # D0.b16 = S0.b16; # S0.b16 = tmp @@ -4338,7 +4559,7 @@ def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = ~S0.u16 S0 = Reg(s0) D0 = Reg(d0) @@ -4348,7 +4569,7 @@ def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -4358,7 +4579,7 @@ def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { 16'0, S0.u16 } S0 = Reg(s0) D0 = Reg(d0) @@ -4368,7 +4589,7 @@ def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if OPSEL[1 : 0].u2 == 2'0U then # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) # elsif OPSEL[1 : 0].u2 == 2'2U then @@ -4395,7 +4616,7 @@ def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if OPSEL[1 : 0].u2 == 2'0U then # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) # elsif OPSEL[1 : 0].u2 == 2'2U then @@ -4422,7 +4643,7 @@ def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) @@ -4438,7 +4659,7 @@ def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) @@ -4539,7 +4760,7 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_PK_F32_BF8: _VOP1Op_V_CVT_PK_F32_BF8, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4553,7 +4774,7 @@ def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -4565,7 +4786,7 @@ def _VOP2Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4576,7 +4797,7 @@ def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4587,7 +4808,7 @@ def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4598,7 +4819,7 @@ def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -4610,7 +4831,7 @@ def _VOP2Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -4629,7 +4850,7 @@ def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4640,7 +4861,7 @@ def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -4651,7 +4872,7 @@ def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -4662,7 +4883,7 @@ def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -4673,7 +4894,7 @@ def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -4684,7 +4905,7 @@ def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -4722,7 +4943,7 @@ def _VOP2Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP2Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -4760,7 +4981,7 @@ def _VOP2Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -4771,7 +4992,7 @@ def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -4782,7 +5003,7 @@ def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4793,7 +5014,7 @@ def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4804,7 +5025,7 @@ def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -4841,7 +5062,7 @@ def _VOP2Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -4878,7 +5099,7 @@ def _VOP2Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4889,7 +5110,7 @@ def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4900,7 +5121,7 @@ def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4911,7 +5132,7 @@ def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4922,7 +5143,7 @@ def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4933,7 +5154,7 @@ def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4944,7 +5165,7 @@ def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4955,7 +5176,7 @@ def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 << S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4967,7 +5188,7 @@ def _VOP2Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -4987,7 +5208,7 @@ def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -5007,7 +5228,7 @@ def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -5027,7 +5248,7 @@ def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -5038,7 +5259,7 @@ def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -5049,7 +5270,7 @@ def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -5060,7 +5281,7 @@ def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -5071,7 +5292,7 @@ def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -5083,7 +5304,7 @@ def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -5095,7 +5316,7 @@ def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -5110,7 +5331,7 @@ def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -5147,7 +5368,7 @@ def _VOP2Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -5184,7 +5405,7 @@ def _VOP2Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -5195,7 +5416,7 @@ def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -5206,7 +5427,7 @@ def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -5217,7 +5438,7 @@ def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -5228,7 +5449,7 @@ def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -5239,7 +5460,7 @@ def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -5251,7 +5472,7 @@ def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -5263,7 +5484,7 @@ def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -5274,7 +5495,7 @@ def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16); # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) S0 = Reg(s0) @@ -5339,7 +5560,7 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, } -def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -5348,15 +5569,18 @@ def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -5365,15 +5589,18 @@ def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5381,15 +5608,18 @@ def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -5398,15 +5628,18 @@ def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5414,15 +5647,18 @@ def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5430,15 +5666,18 @@ def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5447,15 +5686,18 @@ def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5464,15 +5706,18 @@ def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5481,15 +5726,18 @@ def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5498,15 +5746,18 @@ def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -5516,15 +5767,18 @@ def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5533,15 +5787,18 @@ def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -5551,15 +5808,18 @@ def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -5569,15 +5829,18 @@ def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -5586,15 +5849,18 @@ def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -5603,15 +5869,18 @@ def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5619,15 +5888,18 @@ def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -5636,15 +5908,18 @@ def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5652,15 +5927,18 @@ def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5668,15 +5946,18 @@ def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -5685,15 +5966,18 @@ def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -5702,15 +5986,18 @@ def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5719,15 +6006,18 @@ def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5736,15 +6026,18 @@ def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -5754,15 +6047,18 @@ def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5771,15 +6067,18 @@ def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -5789,15 +6088,18 @@ def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -5807,15 +6109,18 @@ def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -5824,15 +6129,18 @@ def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -5841,15 +6149,18 @@ def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5857,15 +6168,18 @@ def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -5874,15 +6188,18 @@ def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5890,15 +6207,18 @@ def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5906,15 +6226,18 @@ def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -5923,15 +6246,18 @@ def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -5940,15 +6266,18 @@ def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5957,15 +6286,18 @@ def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5974,15 +6306,18 @@ def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -5992,15 +6327,18 @@ def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -6009,15 +6347,18 @@ def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -6027,15 +6368,18 @@ def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -6045,15 +6389,18 @@ def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -6062,15 +6409,18 @@ def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -6079,15 +6429,18 @@ def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6095,15 +6448,18 @@ def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -6112,15 +6468,18 @@ def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -6129,15 +6488,18 @@ def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6145,15 +6507,18 @@ def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -6162,15 +6527,18 @@ def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -6179,15 +6547,18 @@ def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6195,15 +6566,18 @@ def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -6212,15 +6586,18 @@ def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -6229,15 +6606,18 @@ def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6245,15 +6625,18 @@ def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -6262,15 +6645,18 @@ def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -6279,15 +6665,18 @@ def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6295,15 +6684,18 @@ def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -6312,15 +6704,18 @@ def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -6329,15 +6724,18 @@ def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6345,15 +6743,18 @@ def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -6362,15 +6763,18 @@ def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -6379,15 +6783,18 @@ def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6395,15 +6802,18 @@ def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -6412,15 +6822,18 @@ def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -6429,15 +6842,18 @@ def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6445,15 +6861,18 @@ def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -6462,15 +6881,18 @@ def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -6479,15 +6901,18 @@ def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6495,15 +6920,18 @@ def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -6512,15 +6940,18 @@ def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -6529,15 +6960,18 @@ def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6545,15 +6979,18 @@ def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -6562,15 +6999,18 @@ def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -6579,15 +7019,18 @@ def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6595,15 +7038,18 @@ def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -6612,15 +7058,18 @@ def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -6629,15 +7078,18 @@ def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6645,15 +7097,18 @@ def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6690,6 +7145,7 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -6708,9 +7164,11 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6747,6 +7205,7 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -6765,9 +7224,11 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6804,6 +7265,7 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -6822,9 +7284,11 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6837,7 +7301,7 @@ def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f16 == S1.f16 S0 = Reg(s0) @@ -6851,7 +7315,7 @@ def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6864,7 +7328,7 @@ def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6877,7 +7341,7 @@ def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6890,7 +7354,7 @@ def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6903,7 +7367,7 @@ def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -6916,7 +7380,7 @@ def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -6929,7 +7393,7 @@ def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -6943,7 +7407,7 @@ def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -6957,7 +7421,7 @@ def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -6971,7 +7435,7 @@ def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -6985,7 +7449,7 @@ def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -6999,7 +7463,7 @@ def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7013,7 +7477,7 @@ def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7026,7 +7490,7 @@ def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f32 == S1.f32 S0 = Reg(s0) @@ -7040,7 +7504,7 @@ def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7053,7 +7517,7 @@ def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7066,7 +7530,7 @@ def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7079,7 +7543,7 @@ def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7092,7 +7556,7 @@ def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -7105,7 +7569,7 @@ def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -7118,7 +7582,7 @@ def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -7132,7 +7596,7 @@ def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -7146,7 +7610,7 @@ def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -7160,7 +7624,7 @@ def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -7174,7 +7638,7 @@ def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -7188,7 +7652,7 @@ def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7202,7 +7666,7 @@ def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 < S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7215,7 +7679,7 @@ def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f64 == S1.f64 S0 = Reg(s0) @@ -7229,7 +7693,7 @@ def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7242,7 +7706,7 @@ def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 > S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7255,7 +7719,7 @@ def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <> S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7268,7 +7732,7 @@ def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 >= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7281,7 +7745,7 @@ def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -7294,7 +7758,7 @@ def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -7307,7 +7771,7 @@ def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -7321,7 +7785,7 @@ def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -7335,7 +7799,7 @@ def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -7349,7 +7813,7 @@ def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -7363,7 +7827,7 @@ def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -7377,7 +7841,7 @@ def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7391,7 +7855,7 @@ def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 < S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7404,7 +7868,7 @@ def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i16 == S1.i16 S0 = Reg(s0) @@ -7418,7 +7882,7 @@ def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7431,7 +7895,7 @@ def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 > S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7444,7 +7908,7 @@ def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <> S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7457,7 +7921,7 @@ def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 >= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7470,7 +7934,7 @@ def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 < S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7483,7 +7947,7 @@ def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u16 == S1.u16 S0 = Reg(s0) @@ -7497,7 +7961,7 @@ def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7510,7 +7974,7 @@ def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 > S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7523,7 +7987,7 @@ def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <> S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7536,7 +8000,7 @@ def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 >= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7549,7 +8013,7 @@ def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7562,7 +8026,7 @@ def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i32 == S1.i32 S0 = Reg(s0) @@ -7576,7 +8040,7 @@ def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7589,7 +8053,7 @@ def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7602,7 +8066,7 @@ def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7615,7 +8079,7 @@ def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7628,7 +8092,7 @@ def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7641,7 +8105,7 @@ def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u32 == S1.u32 S0 = Reg(s0) @@ -7655,7 +8119,7 @@ def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7668,7 +8132,7 @@ def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7681,7 +8145,7 @@ def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7694,7 +8158,7 @@ def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7707,7 +8171,7 @@ def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 < S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7720,7 +8184,7 @@ def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i64 == S1.i64 S0 = Reg(s0) @@ -7734,7 +8198,7 @@ def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7747,7 +8211,7 @@ def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 > S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7760,7 +8224,7 @@ def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <> S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7773,7 +8237,7 @@ def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 >= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7786,7 +8250,7 @@ def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 < S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7799,7 +8263,7 @@ def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u64 == S1.u64 S0 = Reg(s0) @@ -7813,7 +8277,7 @@ def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7826,7 +8290,7 @@ def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 > S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7839,7 +8303,7 @@ def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7852,7 +8316,7 @@ def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 >= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7865,7 +8329,7 @@ def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -7918,7 +8382,7 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -7971,7 +8435,7 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -8024,7 +8488,7 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -8034,7 +8498,7 @@ def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE64 then # // 64 lanes @@ -8077,7 +8541,7 @@ def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8087,7 +8551,7 @@ def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -8098,7 +8562,7 @@ def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -8108,7 +8572,7 @@ def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8118,7 +8582,7 @@ def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8128,7 +8592,7 @@ def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8138,7 +8602,7 @@ def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8148,7 +8612,7 @@ def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8158,7 +8622,7 @@ def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -8168,7 +8632,7 @@ def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -8178,7 +8642,7 @@ def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8188,7 +8652,7 @@ def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8199,7 +8663,7 @@ def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8209,7 +8673,7 @@ def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8219,7 +8683,7 @@ def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8229,7 +8693,7 @@ def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8239,7 +8703,7 @@ def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8249,7 +8713,7 @@ def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8260,7 +8724,7 @@ def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8271,7 +8735,7 @@ def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -8287,7 +8751,7 @@ def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -8303,7 +8767,7 @@ def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -8319,7 +8783,7 @@ def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b16 = S0.b16 S0 = Reg(s0) D0 = Reg(d0) @@ -8329,7 +8793,7 @@ def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8339,7 +8803,7 @@ def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8349,7 +8813,7 @@ def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -8364,7 +8828,7 @@ def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -8379,7 +8843,7 @@ def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -8394,7 +8858,7 @@ def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8404,7 +8868,7 @@ def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8414,7 +8878,7 @@ def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -8424,7 +8888,7 @@ def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -8435,7 +8899,7 @@ def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8445,7 +8909,7 @@ def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -8456,7 +8920,7 @@ def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8467,7 +8931,7 @@ def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8477,7 +8941,7 @@ def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8488,7 +8952,7 @@ def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8498,7 +8962,7 @@ def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8508,7 +8972,7 @@ def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -8518,7 +8982,7 @@ def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -8528,7 +8992,7 @@ def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -8548,7 +9012,7 @@ def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -8568,7 +9032,7 @@ def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -8588,7 +9052,7 @@ def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -8605,7 +9069,7 @@ def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -8623,7 +9087,7 @@ def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8634,7 +9098,7 @@ def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -8651,7 +9115,7 @@ def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -8668,7 +9132,7 @@ def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # addr = SRC0.u32; # // Raw value from instruction # D0.b32 = VGPR[laneId][addr].b32 @@ -8682,7 +9146,7 @@ def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -8692,7 +9156,7 @@ def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -8702,7 +9166,7 @@ def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8712,7 +9176,7 @@ def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8722,7 +9186,7 @@ def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -8732,7 +9196,7 @@ def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8742,7 +9206,7 @@ def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8752,7 +9216,7 @@ def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8762,7 +9226,7 @@ def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8772,7 +9236,7 @@ def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -8789,7 +9253,7 @@ def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -8806,7 +9270,7 @@ def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -8821,7 +9285,7 @@ def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -8836,7 +9300,7 @@ def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8846,7 +9310,7 @@ def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -8861,7 +9325,7 @@ def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + -floor(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8871,7 +9335,7 @@ def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8881,7 +9345,7 @@ def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8891,11 +9355,7 @@ def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if n <= 16'0 then - # elsif n >= 16'255 then - # else - # endif); +def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 16'0; # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); @@ -8904,12 +9364,6 @@ def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) tmp = Reg(0) # --- compiled pseudocode --- - if n <= 0: - pass - elif n >= 255: - pass - else: - pass tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) @@ -8918,7 +9372,7 @@ def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_snorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8928,7 +9382,7 @@ def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_unorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8938,7 +9392,7 @@ def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = ~S0.u16 S0 = Reg(s0) D0 = Reg(d0) @@ -8948,7 +9402,7 @@ def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -8958,7 +9412,7 @@ def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { 16'0, S0.u16 } S0 = Reg(s0) D0 = Reg(d0) @@ -8968,7 +9422,7 @@ def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if OPSEL[1 : 0].u2 == 2'0U then # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) # elsif OPSEL[1 : 0].u2 == 2'2U then @@ -8995,7 +9449,7 @@ def _VOP3Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if OPSEL[1 : 0].u2 == 2'0U then # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) # elsif OPSEL[1 : 0].u2 == 2'2U then @@ -9022,7 +9476,7 @@ def _VOP3Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) @@ -9038,7 +9492,7 @@ def _VOP3Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) @@ -9054,7 +9508,7 @@ def _VOP3Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9068,7 +9522,7 @@ def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -9080,7 +9534,7 @@ def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9091,7 +9545,7 @@ def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9102,7 +9556,7 @@ def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9113,7 +9567,7 @@ def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -9125,7 +9579,7 @@ def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -9144,7 +9598,7 @@ def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9155,7 +9609,7 @@ def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -9166,7 +9620,7 @@ def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -9177,7 +9631,7 @@ def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -9188,7 +9642,7 @@ def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -9199,7 +9653,7 @@ def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -9237,7 +9691,7 @@ def _VOP3Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -9275,7 +9729,7 @@ def _VOP3Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9286,7 +9740,7 @@ def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9297,7 +9751,7 @@ def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9308,7 +9762,7 @@ def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9319,7 +9773,7 @@ def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -9356,7 +9810,7 @@ def _VOP3Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -9393,7 +9847,7 @@ def _VOP3Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9404,7 +9858,7 @@ def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9415,7 +9869,7 @@ def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9426,7 +9880,7 @@ def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9437,7 +9891,7 @@ def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9448,7 +9902,7 @@ def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9459,7 +9913,7 @@ def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9470,7 +9924,7 @@ def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 << S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9482,7 +9936,7 @@ def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9493,7 +9947,7 @@ def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9504,7 +9958,7 @@ def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9515,7 +9969,7 @@ def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -9526,7 +9980,7 @@ def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -9541,7 +9995,7 @@ def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -9578,7 +10032,7 @@ def _VOP3Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -9615,7 +10069,7 @@ def _VOP3Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9626,7 +10080,7 @@ def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9637,7 +10091,7 @@ def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9648,7 +10102,7 @@ def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9659,7 +10113,7 @@ def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -9670,7 +10124,7 @@ def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -9681,7 +10135,7 @@ def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = S2.f32 @@ -9701,7 +10155,7 @@ def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9713,7 +10167,7 @@ def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9725,7 +10179,7 @@ def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9774,7 +10228,7 @@ def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap S coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9816,7 +10270,7 @@ def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap T coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9851,7 +10305,7 @@ def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = 2.0 * cubemap major axis. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9879,7 +10333,7 @@ def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) S0 = Reg(s0) S1 = Reg(s1) @@ -9891,7 +10345,7 @@ def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) S0 = Reg(s0) @@ -9906,7 +10360,7 @@ def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) S0 = Reg(s0) S1 = Reg(s1) @@ -9918,7 +10372,7 @@ def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -9930,7 +10384,7 @@ def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = fma(S0.f64, S1.f64, S2.f64) S0 = Reg(s0) S1 = Reg(s1) @@ -9943,7 +10397,7 @@ def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); @@ -9964,7 +10418,7 @@ def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0]) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -9976,7 +10430,7 @@ def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0] * 8U)) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -9988,7 +10442,7 @@ def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S1.f32 == -MAX_FLOAT_F32) || (64'F(S1.f32) == -INF) || isNAN(64'F(S1.f32)) || (S2.f32 <= 0.0F) || # isNAN(64'F(S2.f32))) then # D0.f32 = -MAX_FLOAT_F32 @@ -10008,7 +10462,7 @@ def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10020,7 +10474,7 @@ def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10032,7 +10486,7 @@ def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10044,7 +10498,7 @@ def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10056,7 +10510,7 @@ def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then # D0.i32 = v_max_i32(S1.i32, S2.i32) # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then @@ -10079,7 +10533,7 @@ def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then # D0.u32 = v_max_u32(S1.u32, S2.u32) # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then @@ -10102,7 +10556,7 @@ def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -10126,7 +10580,7 @@ def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10138,7 +10592,7 @@ def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); @@ -10158,7 +10612,7 @@ def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 S0 = Reg(s0) @@ -10171,7 +10625,7 @@ def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); # D0.u32 = tmp @@ -10188,7 +10642,7 @@ def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f32) ^ sign(S2.f32)); # if isNAN(64'F(S2.f32)) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) @@ -10241,7 +10695,7 @@ def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f64) ^ sign(S2.f64)); # if isNAN(S2.f64) then # D0.f64 = cvtToQuietNAN(S2.f64) @@ -10295,7 +10749,7 @@ def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_MIN3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_min_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10307,7 +10761,7 @@ def _VOP3Op_V_MIN3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_max_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10319,7 +10773,7 @@ def _VOP3Op_V_MAX3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_min_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10331,7 +10785,7 @@ def _VOP3Op_V_MIN3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_max_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10343,7 +10797,7 @@ def _VOP3Op_V_MAX3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10355,7 +10809,7 @@ def _VOP3Op_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10367,7 +10821,7 @@ def _VOP3Op_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_minimum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10379,7 +10833,7 @@ def _VOP3Op_V_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_maximum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10391,7 +10845,7 @@ def _VOP3Op_V_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then # D0.f32 = v_min3_num_f32(S0.f32, S1.f32, S2.f32) # elsif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then @@ -10418,7 +10872,7 @@ def _VOP3Op_V_MED3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then # D0.f16 = v_min3_num_f16(S0.f16, S1.f16, S2.f16) # elsif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then @@ -10445,7 +10899,7 @@ def _VOP3Op_V_MED3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) # else @@ -10467,7 +10921,7 @@ def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) # else @@ -10490,7 +10944,7 @@ def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -10514,7 +10968,7 @@ def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -10536,7 +10990,7 @@ def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -10558,7 +11012,7 @@ def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); @@ -10579,7 +11033,7 @@ def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10591,7 +11045,7 @@ def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 + S2.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -10603,7 +11057,25 @@ def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); + # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); + # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); + # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) + D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) + D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) + D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10615,7 +11087,7 @@ def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10627,7 +11099,7 @@ def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10639,7 +11111,7 @@ def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10651,7 +11123,7 @@ def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -10663,7 +11135,7 @@ def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -10675,7 +11147,7 @@ def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -10687,7 +11159,7 @@ def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -10699,7 +11171,7 @@ def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then # D0.i16 = v_max_i16(S1.i16, S2.i16) # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then @@ -10722,7 +11194,7 @@ def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then # D0.u16 = v_max_u16(S1.u16, S2.u16) # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then @@ -10745,7 +11217,7 @@ def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 * S1.i16 + S2.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -10757,7 +11229,7 @@ def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f16) ^ sign(S2.f16)); # if isNAN(64'F(S2.f16)) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) @@ -10802,7 +11274,7 @@ def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10814,7 +11286,7 @@ def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10826,7 +11298,7 @@ def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10838,7 +11310,7 @@ def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32 | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10850,7 +11322,7 @@ def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10862,7 +11334,7 @@ def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -10874,7 +11346,7 @@ def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = VCC.u64[laneId] ? S1.u16 : S0.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -10888,7 +11360,7 @@ def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10900,7 +11372,7 @@ def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10912,7 +11384,7 @@ def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10924,7 +11396,7 @@ def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10936,7 +11408,7 @@ def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f16; # tmp += S0[15 : 0].f16 * S1[15 : 0].f16; # tmp += S0[31 : 16].f16 * S1[31 : 16].f16; @@ -10955,7 +11427,7 @@ def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.bf16; # tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16; # tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16; @@ -10974,7 +11446,7 @@ def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_max_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10986,7 +11458,7 @@ def _VOP3Op_V_MINMAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_min_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10998,7 +11470,7 @@ def _VOP3Op_V_MAXMIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_max_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -11010,7 +11482,7 @@ def _VOP3Op_V_MINMAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_min_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -11022,7 +11494,7 @@ def _VOP3Op_V_MAXMIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUMMAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUMMAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_maximum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -11034,7 +11506,7 @@ def _VOP3Op_V_MINIMUMMAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUMMINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUMMINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_minimum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -11046,7 +11518,7 @@ def _VOP3Op_V_MAXIMUMMINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUMMAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUMMAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_maximum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -11058,7 +11530,7 @@ def _VOP3Op_V_MINIMUMMAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUMMINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUMMINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_minimum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -11070,7 +11542,7 @@ def _VOP3Op_V_MAXIMUMMINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -11080,7 +11552,7 @@ def _VOP3Op_V_S_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16); # D0[31 : 16] = 16'0x0 S0 = Reg(s0) @@ -11092,7 +11564,7 @@ def _VOP3Op_V_S_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -11102,7 +11574,7 @@ def _VOP3Op_V_S_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16); # D0[31 : 16] = 16'0x0 S0 = Reg(s0) @@ -11114,7 +11586,7 @@ def _VOP3Op_V_S_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -11124,7 +11596,7 @@ def _VOP3Op_V_S_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16; # D0[31 : 16] = 16'0x0 S0 = Reg(s0) @@ -11136,7 +11608,7 @@ def _VOP3Op_V_S_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -11146,7 +11618,7 @@ def _VOP3Op_V_S_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16); # D0[31 : 16] = 16'0x0 S0 = Reg(s0) @@ -11158,7 +11630,7 @@ def _VOP3Op_V_S_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -11168,7 +11640,7 @@ def _VOP3Op_V_S_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16); # D0[31 : 16] = 16'0x0 S0 = Reg(s0) @@ -11180,7 +11652,7 @@ def _VOP3Op_V_S_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 + S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11191,7 +11663,7 @@ def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 - S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11202,7 +11674,7 @@ def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11213,7 +11685,7 @@ def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[31 : 16] = 16'B(v_cvt_i16_f32(S1.f32)); # tmp[15 : 0] = 16'B(v_cvt_i16_f32(S0.f32)); @@ -11227,7 +11699,7 @@ def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[31 : 16] = 16'B(v_cvt_u16_f32(S1.f32)); # tmp[15 : 0] = 16'B(v_cvt_u16_f32(S0.f32)); @@ -11241,7 +11713,7 @@ def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11252,7 +11724,7 @@ def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11263,7 +11735,7 @@ def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11274,7 +11746,7 @@ def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11285,7 +11757,7 @@ def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 + S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11296,7 +11768,7 @@ def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 - S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11307,7 +11779,7 @@ def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[31 : 16].f16 = S1.f16; # D0[15 : 0].f16 = S0.f16 S0 = Reg(s0) @@ -11320,7 +11792,7 @@ def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); @@ -11334,7 +11806,7 @@ def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); @@ -11348,7 +11820,7 @@ def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * 2.0F ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11359,7 +11831,7 @@ def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11370,7 +11842,7 @@ def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32; # for i in 0 : 31 do # tmp += S0[i].u32; @@ -11390,7 +11862,7 @@ def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); @@ -11404,7 +11876,7 @@ def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); @@ -11418,7 +11890,7 @@ def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = u32_to_u16(S0.u32); # tmp[31 : 16].u16 = u32_to_u16(S1.u32); @@ -11432,7 +11904,7 @@ def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = i32_to_i16(S0.i32); # tmp[31 : 16].i16 = i32_to_i16(S1.i32); @@ -11446,7 +11918,7 @@ def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11457,7 +11929,7 @@ def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 + S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11468,7 +11940,7 @@ def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * 2.0 ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11480,7 +11952,7 @@ def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 * S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -11491,7 +11963,7 @@ def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -11502,7 +11974,7 @@ def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -11513,7 +11985,7 @@ def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11524,7 +11996,7 @@ def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11535,7 +12007,7 @@ def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = (S1.i16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11546,7 +12018,7 @@ def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11558,7 +12030,7 @@ def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (S1.i64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11570,7 +12042,7 @@ def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_MINIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -11612,7 +12084,7 @@ def _VOP3Op_V_MINIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_MAXIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -11654,7 +12126,7 @@ def _VOP3Op_V_MAXIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE32 then # lane = S1.u32[4 : 0].u32; @@ -11677,7 +12149,7 @@ def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 & S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11688,7 +12160,7 @@ def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 | S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11699,7 +12171,7 @@ def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 ^ S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11710,7 +12182,7 @@ def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -11751,7 +12223,7 @@ def _VOP3Op_V_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -11792,7 +12264,7 @@ def _VOP3Op_V_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -11833,7 +12305,7 @@ def _VOP3Op_V_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -12205,6 +12677,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MQSAD_U32_U8: _VOP3Op_V_MQSAD_U32_U8, VOP3Op.V_XOR3_B32: _VOP3Op_V_XOR3_B32, VOP3Op.V_MAD_U16: _VOP3Op_V_MAD_U16, + VOP3Op.V_PERM_B32: _VOP3Op_V_PERM_B32, VOP3Op.V_XAD_U32: _VOP3Op_V_XAD_U32, VOP3Op.V_LSHL_ADD_U32: _VOP3Op_V_LSHL_ADD_U32, VOP3Op.V_ADD_LSHL_U32: _VOP3Op_V_ADD_LSHL_U32, @@ -12292,7 +12765,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MAXIMUM_F16: _VOP3Op_V_MAXIMUM_F16, } -def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -12312,7 +12785,7 @@ def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12332,7 +12805,7 @@ def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12352,7 +12825,7 @@ def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # D0.f32 = NAN.f32 @@ -12415,7 +12888,7 @@ def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then # D0.f64 = NAN.f64 @@ -12479,7 +12952,7 @@ def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3SDOp_V_MAD_CO_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_MAD_CO_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) S0 = Reg(s0) S1 = Reg(s1) @@ -12496,7 +12969,7 @@ def _VOP3SDOp_V_MAD_CO_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d1'] = D1._val & 1 return result -def _VOP3SDOp_V_MAD_CO_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_MAD_CO_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) S0 = Reg(s0) S1 = Reg(s1) @@ -12513,7 +12986,7 @@ def _VOP3SDOp_V_MAD_CO_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d1'] = D1._val & 1 return result -def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -12533,7 +13006,7 @@ def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12553,7 +13026,7 @@ def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32; # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12586,7 +13059,7 @@ VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_SUBREV_CO_U32: _VOP3SDOp_V_SUBREV_CO_U32, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; @@ -12604,7 +13077,7 @@ def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12620,7 +13093,7 @@ def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; @@ -12637,7 +13110,7 @@ def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; @@ -12654,7 +13127,7 @@ def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12670,7 +13143,7 @@ def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12686,7 +13159,7 @@ def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12702,7 +13175,7 @@ def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; @@ -12719,7 +13192,7 @@ def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; @@ -12736,7 +13209,7 @@ def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; @@ -12754,7 +13227,7 @@ def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; @@ -12771,7 +13244,7 @@ def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; @@ -12788,7 +13261,7 @@ def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; @@ -12805,7 +13278,7 @@ def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; @@ -12822,7 +13295,7 @@ def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); @@ -12840,7 +13313,7 @@ def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; @@ -12857,7 +13330,7 @@ def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; @@ -12874,7 +13347,7 @@ def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -12893,7 +13366,7 @@ def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); @@ -12916,7 +13389,7 @@ def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); @@ -12947,7 +13420,7 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); @@ -12966,7 +13439,7 @@ def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_min_num_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_min_num_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -12983,7 +13456,7 @@ def _VOP3POp_V_PK_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_max_num_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_max_num_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -13000,7 +13473,7 @@ def _VOP3POp_V_PK_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_minimum_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_minimum_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -13017,7 +13490,7 @@ def _VOP3POp_V_PK_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_maximum_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_maximum_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -13034,7 +13507,7 @@ def _VOP3POp_V_PK_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_F32_FP8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_F32_FP8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += 32'F(S0[7 : 0].fp8) * 32'F(S1[7 : 0].bf8); # tmp += 32'F(S0[15 : 8].fp8) * 32'F(S1[15 : 8].bf8); @@ -13057,7 +13530,7 @@ def _VOP3POp_V_DOT4_F32_FP8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_F32_BF8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_F32_BF8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += 32'F(S0[7 : 0].bf8) * 32'F(S1[7 : 0].fp8); # tmp += 32'F(S0[15 : 8].bf8) * 32'F(S1[15 : 8].fp8); @@ -13080,7 +13553,7 @@ def _VOP3POp_V_DOT4_F32_BF8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_F32_FP8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_F32_FP8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += 32'F(S0[7 : 0].fp8) * 32'F(S1[7 : 0].fp8); # tmp += 32'F(S0[15 : 8].fp8) * 32'F(S1[15 : 8].fp8); @@ -13103,7 +13576,7 @@ def _VOP3POp_V_DOT4_F32_FP8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_F32_BF8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_F32_BF8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += 32'F(S0[7 : 0].bf8) * 32'F(S1[7 : 0].bf8); # tmp += 32'F(S0[15 : 8].bf8) * 32'F(S1[15 : 8].bf8); @@ -13158,7 +13631,7 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_DOT4_F32_BF8_BF8: _VOP3POp_V_DOT4_F32_BF8_BF8, } -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -13167,6 +13640,7 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- @@ -13174,9 +13648,11 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -13185,6 +13661,7 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- @@ -13192,9 +13669,11 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13202,6 +13681,7 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- @@ -13209,9 +13689,11 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -13220,6 +13702,7 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- @@ -13227,9 +13710,11 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13237,6 +13722,7 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- @@ -13244,9 +13730,11 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13254,6 +13742,7 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- @@ -13261,9 +13750,11 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -13272,6 +13763,7 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- @@ -13279,9 +13771,11 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -13290,6 +13784,7 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- @@ -13297,9 +13792,11 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -13308,6 +13805,7 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- @@ -13315,9 +13813,11 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -13326,6 +13826,7 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- @@ -13333,9 +13834,11 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -13345,6 +13848,7 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- @@ -13352,9 +13856,11 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13363,6 +13869,7 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- @@ -13370,9 +13877,11 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -13382,6 +13891,7 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- @@ -13389,9 +13899,11 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -13401,6 +13913,7 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- @@ -13408,9 +13921,11 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -13419,6 +13934,7 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- @@ -13426,9 +13942,11 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -13437,6 +13955,7 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- @@ -13444,9 +13963,11 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13454,6 +13975,7 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- @@ -13461,9 +13983,11 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -13472,6 +13996,7 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- @@ -13479,9 +14004,11 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13489,6 +14016,7 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- @@ -13496,9 +14024,11 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13506,6 +14036,7 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- @@ -13513,9 +14044,11 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -13524,6 +14057,7 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- @@ -13531,9 +14065,11 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -13542,6 +14078,7 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- @@ -13549,9 +14086,11 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -13560,6 +14099,7 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- @@ -13567,9 +14107,11 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -13578,6 +14120,7 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- @@ -13585,9 +14128,11 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -13597,6 +14142,7 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- @@ -13604,9 +14150,11 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13615,6 +14163,7 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- @@ -13622,9 +14171,11 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -13634,6 +14185,7 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- @@ -13641,9 +14193,11 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -13653,6 +14207,7 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- @@ -13660,9 +14215,11 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -13671,6 +14228,7 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- @@ -13678,9 +14236,11 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -13689,6 +14249,7 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- @@ -13696,9 +14257,11 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13706,6 +14269,7 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- @@ -13713,9 +14277,11 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -13724,6 +14290,7 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- @@ -13731,9 +14298,11 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13741,6 +14310,7 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- @@ -13748,9 +14318,11 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13758,6 +14330,7 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- @@ -13765,9 +14338,11 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -13776,6 +14351,7 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- @@ -13783,9 +14359,11 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -13794,6 +14372,7 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- @@ -13801,9 +14380,11 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -13812,6 +14393,7 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- @@ -13819,9 +14401,11 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -13830,6 +14414,7 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- @@ -13837,9 +14422,11 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -13849,6 +14436,7 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- @@ -13856,9 +14444,11 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13867,6 +14457,7 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- @@ -13874,9 +14465,11 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -13886,6 +14479,7 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- @@ -13893,9 +14487,11 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -13905,6 +14501,7 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- @@ -13912,9 +14509,11 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -13923,6 +14522,7 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- @@ -13930,9 +14530,11 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -13941,6 +14543,7 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- @@ -13948,9 +14551,11 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13958,6 +14563,7 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- @@ -13965,9 +14571,11 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -13976,6 +14584,7 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- @@ -13983,9 +14592,11 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -13994,6 +14605,7 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- @@ -14001,9 +14613,11 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14011,6 +14625,7 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- @@ -14018,9 +14633,11 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -14029,6 +14646,7 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- @@ -14036,9 +14654,11 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -14047,6 +14667,7 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- @@ -14054,9 +14675,11 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14064,6 +14687,7 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- @@ -14071,9 +14695,11 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -14082,6 +14708,7 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- @@ -14089,9 +14716,11 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -14100,6 +14729,7 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- @@ -14107,9 +14737,11 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14117,6 +14749,7 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- @@ -14124,9 +14757,11 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -14135,6 +14770,7 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- @@ -14142,9 +14778,11 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -14153,6 +14791,7 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- @@ -14160,9 +14799,11 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14170,6 +14811,7 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- @@ -14177,9 +14819,11 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -14188,6 +14832,7 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- @@ -14195,9 +14840,11 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -14206,6 +14853,7 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- @@ -14213,9 +14861,11 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14223,6 +14873,7 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- @@ -14230,9 +14881,11 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -14241,6 +14894,7 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- @@ -14248,9 +14902,11 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -14259,6 +14915,7 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- @@ -14266,9 +14923,11 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14276,6 +14935,7 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- @@ -14283,9 +14943,11 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -14294,6 +14956,7 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- @@ -14301,9 +14964,11 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -14312,6 +14977,7 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- @@ -14319,9 +14985,11 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14329,6 +14997,7 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- @@ -14336,9 +15005,11 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -14347,6 +15018,7 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- @@ -14354,9 +15026,11 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -14365,6 +15039,7 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- @@ -14372,9 +15047,11 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14382,6 +15059,7 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- @@ -14389,9 +15067,11 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -14400,6 +15080,7 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- @@ -14407,9 +15088,11 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -14418,6 +15101,7 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- @@ -14425,9 +15109,11 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14435,6 +15121,7 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- @@ -14442,9 +15129,11 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -14453,6 +15142,7 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- @@ -14460,9 +15150,11 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -14471,6 +15163,7 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- @@ -14478,9 +15171,11 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14488,6 +15183,7 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- @@ -14495,9 +15191,11 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -14506,6 +15204,7 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- @@ -14513,9 +15212,11 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -14524,6 +15225,7 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- @@ -14531,9 +15233,11 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14541,6 +15245,7 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- @@ -14548,9 +15253,11 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14587,6 +15294,7 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -14606,9 +15314,11 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14645,6 +15355,7 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -14664,9 +15375,11 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14703,6 +15416,7 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -14722,9 +15436,11 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14737,7 +15453,7 @@ def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f16 == S1.f16 S0 = Reg(s0) @@ -14751,7 +15467,7 @@ def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14764,7 +15480,7 @@ def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14777,7 +15493,7 @@ def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14790,7 +15506,7 @@ def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14803,7 +15519,7 @@ def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -14816,7 +15532,7 @@ def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -14829,7 +15545,7 @@ def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -14843,7 +15559,7 @@ def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -14857,7 +15573,7 @@ def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -14871,7 +15587,7 @@ def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -14885,7 +15601,7 @@ def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -14899,7 +15615,7 @@ def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -14913,7 +15629,7 @@ def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14926,7 +15642,7 @@ def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f32 == S1.f32 S0 = Reg(s0) @@ -14940,7 +15656,7 @@ def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14953,7 +15669,7 @@ def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14966,7 +15682,7 @@ def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14979,7 +15695,7 @@ def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14992,7 +15708,7 @@ def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -15005,7 +15721,7 @@ def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -15018,7 +15734,7 @@ def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -15032,7 +15748,7 @@ def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -15046,7 +15762,7 @@ def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -15060,7 +15776,7 @@ def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -15074,7 +15790,7 @@ def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -15088,7 +15804,7 @@ def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -15102,7 +15818,7 @@ def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 < S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15115,7 +15831,7 @@ def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f64 == S1.f64 S0 = Reg(s0) @@ -15129,7 +15845,7 @@ def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15142,7 +15858,7 @@ def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 > S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15155,7 +15871,7 @@ def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <> S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15168,7 +15884,7 @@ def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 >= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15181,7 +15897,7 @@ def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -15194,7 +15910,7 @@ def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -15207,7 +15923,7 @@ def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -15221,7 +15937,7 @@ def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -15235,7 +15951,7 @@ def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -15249,7 +15965,7 @@ def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -15263,7 +15979,7 @@ def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -15277,7 +15993,7 @@ def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -15291,7 +16007,7 @@ def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 < S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15304,7 +16020,7 @@ def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i16 == S1.i16 S0 = Reg(s0) @@ -15318,7 +16034,7 @@ def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15331,7 +16047,7 @@ def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 > S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15344,7 +16060,7 @@ def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <> S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15357,7 +16073,7 @@ def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 >= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15370,7 +16086,7 @@ def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 < S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15383,7 +16099,7 @@ def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u16 == S1.u16 S0 = Reg(s0) @@ -15397,7 +16113,7 @@ def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15410,7 +16126,7 @@ def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 > S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15423,7 +16139,7 @@ def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <> S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15436,7 +16152,7 @@ def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 >= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15449,7 +16165,7 @@ def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15462,7 +16178,7 @@ def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i32 == S1.i32 S0 = Reg(s0) @@ -15476,7 +16192,7 @@ def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15489,7 +16205,7 @@ def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15502,7 +16218,7 @@ def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15515,7 +16231,7 @@ def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15528,7 +16244,7 @@ def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15541,7 +16257,7 @@ def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u32 == S1.u32 S0 = Reg(s0) @@ -15555,7 +16271,7 @@ def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15568,7 +16284,7 @@ def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15581,7 +16297,7 @@ def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15594,7 +16310,7 @@ def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15607,7 +16323,7 @@ def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 < S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15620,7 +16336,7 @@ def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i64 == S1.i64 S0 = Reg(s0) @@ -15634,7 +16350,7 @@ def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15647,7 +16363,7 @@ def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 > S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15660,7 +16376,7 @@ def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <> S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15673,7 +16389,7 @@ def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 >= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15686,7 +16402,7 @@ def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 < S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15699,7 +16415,7 @@ def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u64 == S1.u64 S0 = Reg(s0) @@ -15713,7 +16429,7 @@ def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15726,7 +16442,7 @@ def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 > S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15739,7 +16455,7 @@ def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15752,7 +16468,7 @@ def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 >= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15765,7 +16481,7 @@ def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -15818,7 +16534,7 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -15871,7 +16587,7 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. diff --git a/extra/assembly/amd/emu.py b/extra/assembly/amd/emu.py index dbbd33b820..7e9dbd014b 100644 --- a/extra/assembly/amd/emu.py +++ b/extra/assembly/amd/emu.py @@ -205,21 +205,11 @@ def exec_scalar(st: WaveState, inst: Inst) -> int: compiled = _get_compiled() inst_type = type(inst) - # SOPP: control flow (not ALU) + # SOPP: special cases for control flow that has no pseudocode if inst_type is SOPP: op = inst.op if op == SOPPOp.S_ENDPGM: return -1 if op == SOPPOp.S_BARRIER: return -2 - if op == SOPPOp.S_BRANCH: return _sext(inst.simm16, 16) - if op == SOPPOp.S_CBRANCH_SCC0: return _sext(inst.simm16, 16) if st.scc == 0 else 0 - if op == SOPPOp.S_CBRANCH_SCC1: return _sext(inst.simm16, 16) if st.scc == 1 else 0 - if op == SOPPOp.S_CBRANCH_VCCZ: return _sext(inst.simm16, 16) if (st.vcc & 0xffffffff) == 0 else 0 - if op == SOPPOp.S_CBRANCH_VCCNZ: return _sext(inst.simm16, 16) if (st.vcc & 0xffffffff) != 0 else 0 - if op == SOPPOp.S_CBRANCH_EXECZ: return _sext(inst.simm16, 16) if st.exec_mask == 0 else 0 - if op == SOPPOp.S_CBRANCH_EXECNZ: return _sext(inst.simm16, 16) if st.exec_mask != 0 else 0 - # Valid SOPP range is 0-61 (max defined opcode); anything above is invalid - if op > 61: raise NotImplementedError(f"Invalid SOPP opcode {op}") - return 0 # waits, hints, nops # SMEM: memory loads (not ALU) if inst_type is SMEM: @@ -229,46 +219,39 @@ def exec_scalar(st: WaveState, inst: Inst) -> int: for i in range(cnt): st.wsgpr(inst.sdata + i, mem_read((addr + i * 4) & 0xffffffffffffffff, 4)) return 0 - # SOP1: special handling for ops not in pseudocode - if inst_type is SOP1: - op = SOP1Op(inst.op) - # S_GETPC_B64: Get program counter (PC is stored as byte offset, convert from words) - if op == SOP1Op.S_GETPC_B64: - pc_bytes = st.pc * 4 # PC is in words, convert to bytes - st.wsgpr64(inst.sdst, pc_bytes) - return 0 - # S_SETPC_B64: Set program counter to source value (indirect jump) - # Returns delta such that st.pc + inst_words + delta = target_words - if op == SOP1Op.S_SETPC_B64: - target_bytes = st.rsrc64(inst.ssrc0, 0) - target_words = target_bytes // 4 - inst_words = 1 # SOP1 is always 1 word - return target_words - st.pc - inst_words - # Get op enum and lookup compiled function if inst_type is SOP1: op_cls, ssrc0, sdst = SOP1Op, inst.ssrc0, inst.sdst elif inst_type is SOP2: op_cls, ssrc0, sdst = SOP2Op, inst.ssrc0, inst.sdst elif inst_type is SOPC: op_cls, ssrc0, sdst = SOPCOp, inst.ssrc0, None elif inst_type is SOPK: op_cls, ssrc0, sdst = SOPKOp, inst.sdst, inst.sdst # sdst is both src and dst + elif inst_type is SOPP: op_cls, ssrc0, sdst = SOPPOp, None, None else: raise NotImplementedError(f"Unknown scalar type {inst_type}") - op = op_cls(inst.op) + # SOPP has gaps in the opcode enum - treat unknown opcodes as no-ops + try: op = op_cls(inst.op) + except ValueError: + if inst_type is SOPP: return 0 + raise fn = compiled.get(op_cls, {}).get(op) - if fn is None: raise NotImplementedError(f"{op.name} not in pseudocode") + if fn is None: + # SOPP instructions without pseudocode (waits, hints, nops) are no-ops + if inst_type is SOPP: return 0 + raise NotImplementedError(f"{op.name} not in pseudocode") # Build context - handle 64-bit ops that need 64-bit source reads # 64-bit source ops: name ends with _B64, _I64, _U64 or contains _U64, _I64 before last underscore is_64bit_s0 = op.name.endswith(('_B64', '_I64', '_U64')) or '_U64_' in op.name or '_I64_' in op.name is_64bit_s0s1 = op_cls is SOPCOp and op in (SOPCOp.S_CMP_EQ_U64, SOPCOp.S_CMP_LG_U64) - s0 = st.rsrc64(ssrc0, 0) if is_64bit_s0 or is_64bit_s0s1 else (st.rsrc(ssrc0, 0) if inst_type != SOPK else st.rsgpr(inst.sdst)) + s0 = st.rsrc64(ssrc0, 0) if is_64bit_s0 or is_64bit_s0s1 else (st.rsrc(ssrc0, 0) if inst_type not in (SOPK, SOPP) else (st.rsgpr(inst.sdst) if inst_type is SOPK else 0)) is_64bit_sop2 = is_64bit_s0 and inst_type is SOP2 s1 = st.rsrc64(inst.ssrc1, 0) if (is_64bit_sop2 or is_64bit_s0s1) else (st.rsrc(inst.ssrc1, 0) if inst_type in (SOP2, SOPC) else inst.simm16 if inst_type is SOPK else 0) d0 = st.rsgpr64(sdst) if (is_64bit_s0 or is_64bit_s0s1) and sdst is not None else (st.rsgpr(sdst) if sdst is not None else 0) exec_mask = st.exec_mask - literal = inst.simm16 if inst_type is SOPK else st.literal + literal = inst.simm16 if inst_type in (SOPK, SOPP) else st.literal - # Execute compiled function - result = fn(s0, s1, 0, d0, st.scc, st.vcc, 0, exec_mask, literal, None, {}) + # Execute compiled function - pass PC in bytes for instructions that need it + pc_bytes = st.pc * 4 + result = fn(s0, s1, 0, d0, st.scc, st.vcc, 0, exec_mask, literal, None, {}, pc=pc_bytes) # Apply results if sdst is not None: @@ -278,7 +261,11 @@ def exec_scalar(st: WaveState, inst: Inst) -> int: st.wsgpr(sdst, result['d0']) if 'scc' in result: st.scc = result['scc'] if 'exec' in result: st.exec_mask = result['exec'] - if 'pc_delta' in result: return result['pc_delta'] + if 'new_pc' in result: + # Convert absolute byte address to word delta + # new_pc is where we want to go, st.pc is current position, inst._words will be added after + new_pc_words = result['new_pc'] // 4 + return new_pc_words - st.pc - 1 # -1 because emulator adds inst_words (1 for scalar) return 0 def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = None) -> None: @@ -402,20 +389,6 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No op_cls, op, src0, src1, src2, vdst = VOPCOp, VOPCOp(inst.op), inst.src0, inst.src1, None, inst.vdst else: op_cls, op, src0, src1, src2, vdst = VOP3Op, VOP3Op(inst.op), inst.src0, inst.src1, inst.src2, inst.vdst - # V_PERM_B32: byte permutation - not in pseudocode PDF, implement directly - # D0[byte_i] = selector[byte_i] < 8 ? {src0, src1}[selector[byte_i]] : (selector[byte_i] >= 0xD ? 0xFF : 0x00) - if op == VOP3Op.V_PERM_B32: - s0, s1, s2 = st.rsrc(inst.src0, lane), st.rsrc(inst.src1, lane), st.rsrc(inst.src2, lane) - # Combine src1 and src0 into 8-byte value: src1 is bytes 0-3, src0 is bytes 4-7 - combined = (s1 & 0xffffffff) | ((s0 & 0xffffffff) << 32) - result = 0 - for i in range(4): # 4 result bytes - sel = (s2 >> (i * 8)) & 0xff # byte selector for this position - if sel <= 7: result |= (((combined >> (sel * 8)) & 0xff) << (i * 8)) # select byte from combined - elif sel >= 0xd: result |= (0xff << (i * 8)) # 0xD-0xF: constant 0xFF - # else 0x8-0xC: constant 0x00 (already 0) - V[vdst] = result & 0xffffffff - return elif inst_type is VOPC: op = VOPCOp(inst.op) # For 16-bit VOPC, vsrc1 uses same encoding as VOP2 16-bit: bit 7 selects hi(1) or lo(0) half diff --git a/extra/assembly/amd/pcode.py b/extra/assembly/amd/pcode.py index 05b23d7528..c230384324 100644 --- a/extra/assembly/amd/pcode.py +++ b/extra/assembly/amd/pcode.py @@ -341,7 +341,7 @@ def F(x): if isinstance(x, int): return _f32(x) # int -> interpret as f32 bits if isinstance(x, TypedView): return x # preserve TypedView for bit-pattern checks return float(x) # already a float or float-like -signext = lambda x: x +signext = lambda x: int(x) # sign-extend to full width - already handled by Python's arbitrary precision ints pack = lambda hi, lo: ((int(hi) & 0xffff) << 16) | (int(lo) & 0xffff) pack32 = lambda hi, lo: ((int(hi) & 0xffffffff) << 32) | (int(lo) & 0xffffffff) _pack, _pack32 = pack, pack32 # Aliases for internal use @@ -519,6 +519,17 @@ class TypedView: def __bool__(s): return bool(int(s)) + # Allow chained type access like jump_addr.i64 when jump_addr is already a TypedView + # These just return self or convert appropriately + @property + def i64(s): return s if s._bits == 64 and s._signed else int(s) + @property + def u64(s): return s if s._bits == 64 and not s._signed else int(s) & MASK64 + @property + def i32(s): return s if s._bits == 32 and s._signed else _sext(int(s) & MASK32, 32) + @property + def u32(s): return s if s._bits == 32 and not s._signed else int(s) & MASK32 + class Reg: """GPU register: D0.f32 = S0.f32 + S1.f32 just works.""" __slots__ = ('_val',) @@ -542,6 +553,7 @@ class Reg: bf16 = property(lambda s: TypedView(s, 16, is_float=True, is_bf16=True), lambda s, v: setattr(s, '_val', (s._val & 0xffff0000) | ((v if isinstance(v, int) else _ibf16(float(v))) & 0xffff))) u8 = property(lambda s: TypedView(s, 8)) i8 = property(lambda s: TypedView(s, 8, signed=True)) + u1 = property(lambda s: TypedView(s, 1)) # single bit def __getitem__(s, key): if isinstance(key, slice): return SliceProxy(s, int(key.start), int(key.stop)) @@ -664,7 +676,7 @@ def compile_pseudocode(pseudocode: str) -> str: def _assign(lhs: str, rhs: str) -> str: """Generate assignment. Bare tmp/SCC/etc get wrapped in Reg().""" - if lhs in ('tmp', 'SCC', 'VCC', 'EXEC', 'D0', 'D1', 'saveexec'): + if lhs in ('tmp', 'SCC', 'VCC', 'EXEC', 'D0', 'D1', 'saveexec', 'PC'): return f"{lhs} = Reg({rhs})" return f"{lhs} = {rhs}" @@ -801,14 +813,14 @@ INST_PATTERN = re.compile(r'^([SV]_[A-Z0-9_]+)\s+(\d+)\s*$', re.M) # Patterns that can't be handled by the DSL (require special handling in emu.py) UNSUPPORTED = ['SGPR[', 'V_SWAP', 'eval ', 'FATAL_HALT', 'HW_REGISTERS', - 'PC =', 'PC=', 'PC+', '= PC', 'vscnt', 'vmcnt', 'expcnt', 'lgkmcnt', + 'vscnt', 'vmcnt', 'expcnt', 'lgkmcnt', 'CVT_OFF_TABLE', 'ThreadMask', 'S1[i', 'C.i32', 'S[i]', 'in[', '2.0 / PI', 'if n.', 'DST.u32', 'addrd = DST', 'addr = DST'] # Malformed pseudocode from PDF def extract_pseudocode(text: str) -> str | None: """Extract pseudocode from an instruction description snippet.""" - lines, result, depth = text.split('\n'), [], 0 + lines, result, depth, in_lambda = text.split('\n'), [], 0, 0 for line in lines: s = line.strip() if not s: continue @@ -817,12 +829,17 @@ def extract_pseudocode(text: str) -> str | None: # Skip document headers (RDNA or CDNA) if s.startswith('"RDNA') or s.startswith('AMD ') or s.startswith('CDNA'): continue if s.startswith('Notes') or s.startswith('Functional examples'): break + # Track lambda definitions (e.g., BYTE_PERMUTE = lambda(data, sel) (...)) + if '= lambda(' in s: in_lambda += 1; continue + if in_lambda > 0: + if s.endswith(');'): in_lambda -= 1 + continue if s.startswith('if '): depth += 1 elif s.startswith('endif'): depth = max(0, depth - 1) if s.endswith('.') and not any(p in s for p in ['D0', 'D1', 'S0', 'S1', 'S2', 'SCC', 'VCC', 'tmp', '=']): continue if re.match(r'^[a-z].*\.$', s) and '=' not in s: continue is_code = ( - any(p in s for p in ['D0.', 'D1.', 'S0.', 'S1.', 'S2.', 'SCC =', 'SCC ?', 'VCC', 'EXEC', 'tmp =', 'tmp[', 'lane =']) or + any(p in s for p in ['D0.', 'D1.', 'S0.', 'S1.', 'S2.', 'SCC =', 'SCC ?', 'VCC', 'EXEC', 'tmp =', 'tmp[', 'lane =', 'PC =']) or any(p in s for p in ['D0[', 'D1[', 'S0[', 'S1[', 'S2[']) or s.startswith(('if ', 'else', 'elsif', 'endif', 'declare ', 'for ', 'endfor', '//')) or re.match(r'^[a-z_]+\s*=', s) or re.match(r'^[a-z_]+\[', s) or (depth > 0 and '=' in s) @@ -1043,10 +1060,12 @@ from extra.assembly.amd.pcode import * is_div_scale = 'DIV_SCALE' in op.name # VOP3SD instructions that write VCC per-lane (either via VCC.u64[laneId] or by setting VCC = 0/1) has_sdst = cls_name == 'VOP3SDOp' and ('VCC.u64[laneId]' in pc or is_div_scale) + # Instructions that use/modify PC + has_pc = 'PC' in pc # Generate function with indented body fn_name = f"_{cls_name}_{op.name}" - lines.append(f"def {fn_name}(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0):") + lines.append(f"def {fn_name}(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0):") # Add original pseudocode as comment for pc_line in pc.split('\n'): lines.append(f" # {pc_line}") @@ -1057,14 +1076,21 @@ from extra.assembly.amd.pcode import * ('SCC', 'Reg(scc)'), ('VCC', 'Reg(vcc)'), ('EXEC', 'Reg(exec_mask)'), ('tmp', 'Reg(0)'), ('saveexec', 'Reg(exec_mask)'), ('laneId', 'lane'), ('SIMM16', 'Reg(literal)'), ('SIMM32', 'Reg(literal)'), - ('SRC0', 'Reg(src0_idx)'), ('VDST', 'Reg(vdst_idx)')] + ('SRC0', 'Reg(src0_idx)'), ('VDST', 'Reg(vdst_idx)'), + ('PC', 'Reg(pc)')] # PC is passed in as byte address used = {name for name, _ in regs if name in combined} # EXEC_LO/EXEC_HI need EXEC if 'EXEC_LO' in combined or 'EXEC_HI' in combined: used.add('EXEC') + # VCCZ/EXECZ need VCC/EXEC + if 'VCCZ' in combined: used.add('VCC') + if 'EXECZ' in combined: used.add('EXEC') for name, init in regs: if name in used: lines.append(f" {name} = {init}") if 'EXEC_LO' in combined: lines.append(" EXEC_LO = SliceProxy(EXEC, 31, 0)") if 'EXEC_HI' in combined: lines.append(" EXEC_HI = SliceProxy(EXEC, 63, 32)") + # VCCZ = 1 if VCC == 0, EXECZ = 1 if EXEC == 0 + if 'VCCZ' in combined: lines.append(" VCCZ = Reg(1 if VCC._val == 0 else 0)") + if 'EXECZ' in combined: lines.append(" EXECZ = Reg(1 if EXEC._val == 0 else 0)") # Add compiled pseudocode with markers lines.append(" # --- compiled pseudocode ---") for line in code.split('\n'): @@ -1088,6 +1114,11 @@ from extra.assembly.amd.pcode import * lines.append(" result['d0_64'] = True") if has_d1: lines.append(" result['d1'] = D1._val & 1") + if has_pc: + # Return new PC as absolute byte address, emulator will compute delta + # Handle negative values (backward jumps): PC._val is stored as unsigned, convert to signed + lines.append(" _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000") + lines.append(" result['new_pc'] = _pc # absolute byte address") lines.append(" return result") lines.append("") diff --git a/extra/assembly/amd/test/test_emu.py b/extra/assembly/amd/test/test_emu.py index 0c8b0c0517..e9055070ad 100644 --- a/extra/assembly/amd/test/test_emu.py +++ b/extra/assembly/amd/test/test_emu.py @@ -2590,6 +2590,30 @@ class TestNewPcodeHelpers(unittest.TestCase): # byte 3: sel=0x0C = 12 -> 0x00 self.assertEqual(result, 0x00FFFFFF, f"Expected 0x00FFFFFF, got 0x{result:08x}") + def test_v_perm_b32_sign_extend(self): + """V_PERM_B32: Test sign extension selectors 8-11.""" + # Combined = {S0, S1} where S1 is bytes 0-3, S0 is bytes 4-7 + # s0 = 0x00008000 -> byte 5 (0x80) has sign bit set + # s1 = 0x80000080 -> bytes 1 (0x00) and 3 (0x80) have sign bits, byte 0 (0x80) has sign bit + # Combined = 0x00008000_80000080 + # selector = 0x08090A0B -> sign of bytes 1,3,5,7 + # byte 0: sel=0x0B -> sign of byte 7 (0x00) -> 0x00 + # byte 1: sel=0x0A -> sign of byte 5 (0x80) -> 0xFF + # byte 2: sel=0x09 -> sign of byte 3 (0x80) -> 0xFF + # byte 3: sel=0x08 -> sign of byte 1 (0x00) -> 0x00 + instructions = [ + s_mov_b32(s[0], 0x00008000), + s_mov_b32(s[1], 0x80000080), + s_mov_b32(s[2], 0x08090A0B), + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], s[1]), + v_mov_b32_e32(v[2], s[2]), + v_perm_b32(v[3], v[0], v[1], v[2]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][3] + self.assertEqual(result, 0x00FFFF00, f"Expected 0x00FFFF00, got 0x{result:08x}") + def test_v_dot2_f32_bf16_basic(self): """V_DOT2_F32_BF16: Dot product of two bf16 pairs accumulated into f32.""" from extra.assembly.amd.pcode import _ibf16