diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 657a85a077..ef2f8ce66b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -654,7 +654,7 @@ jobs: - name: Run process replay tests uses: ./.github/actions/process-replay - testrdna3: + testamdasm: name: AMD ASM IDE runs-on: ubuntu-24.04 timeout-minutes: 10 @@ -679,8 +679,23 @@ jobs: run: python -m pytest -n=auto extra/assembly/amd/ --durations 20 - name: Run RDNA3 emulator tests (AMD_LLVM=1) run: AMD_LLVM=1 python -m pytest -n=auto extra/assembly/amd/ --durations 20 - - name: Install pdfplumber - run: pip install pdfplumber + - name: Run RDNA3 dtype tests + run: PYTHONPATH="." AMD=1 PYTHON_REMU=1 MOCKGPU=1 AMD_LLVM=0 pytest -n=auto test/test_dtype_alu.py test/test_dtype.py + - name: Run RDNA3 dtype tests (AMD_LLVM=1) + run: PYTHONPATH="." AMD=1 PYTHON_REMU=1 MOCKGPU=1 AMD_LLVM=1 pytest -n=auto test/test_dtype_alu.py test/test_dtype.py + + testamdautogen: + name: AMD autogen + runs-on: ubuntu-24.04 + timeout-minutes: 10 + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Setup Environment + uses: ./.github/actions/setup-tinygrad + with: + key: rdna3-autogen + pydeps: "pdfplumber" - name: Verify AMD autogen is up to date run: | python -m extra.assembly.amd.dsl --arch all diff --git a/extra/assembly/amd/autogen/cdna/gen_pcode.py b/extra/assembly/amd/autogen/cdna/gen_pcode.py index cb2f3e8f06..b5cd12abc0 100644 --- a/extra/assembly/amd/autogen/cdna/gen_pcode.py +++ b/extra/assembly/amd/autogen/cdna/gen_pcode.py @@ -18284,6 +18284,37 @@ def _VOP3AOp_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result +def _VOP3AOp_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # shift = 32'I(S1[4 : 0].u32) * 53; + # if exponent(S0.f64) > 1077 then + # shift += exponent(S0.f64) - 1077 + # endif; + # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} + # // b_1200 is the MSB of the fractional part of 2.0/PI + # // Left shift operation indicates which bits are brought + # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); + # scale = -53 - shift; + # if exponent(S0.f64) >= 1968 then + # scale += 128 + # endif; + # D0.f64 = ldexp(result, scale) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + shift = (S1[4 : 0].u32) * 53 + if exponent(S0.f64) > 1077: + shift += exponent(S0.f64) - 1077 + result = float(((TWO_OVER_PI_1201[1200 : 0] << int(shift)) >> (1201 - 53)) & 0x1fffffffffffff) + scale = -53 - shift + if exponent(S0.f64) >= 1968: + scale += 128 + D0.f64 = ldexp(result, scale) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + def _VOP3AOp_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) @@ -18940,6 +18971,7 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_LSHLREV_B64: _VOP3AOp_V_LSHLREV_B64, VOP3AOp.V_LSHRREV_B64: _VOP3AOp_V_LSHRREV_B64, VOP3AOp.V_ASHRREV_I64: _VOP3AOp_V_ASHRREV_I64, + VOP3AOp.V_TRIG_PREOP_F64: _VOP3AOp_V_TRIG_PREOP_F64, VOP3AOp.V_BFM_B32: _VOP3AOp_V_BFM_B32, VOP3AOp.V_CVT_PKNORM_I16_F32: _VOP3AOp_V_CVT_PKNORM_I16_F32, VOP3AOp.V_CVT_PKNORM_U16_F32: _VOP3AOp_V_CVT_PKNORM_U16_F32, diff --git a/extra/assembly/amd/autogen/rdna3/gen_pcode.py b/extra/assembly/amd/autogen/rdna3/gen_pcode.py index df32416e22..e480cbb93e 100644 --- a/extra/assembly/amd/autogen/rdna3/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna3/gen_pcode.py @@ -5497,6 +5497,7 @@ def _VOP3Op_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5517,6 +5518,7 @@ def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5537,6 +5539,7 @@ def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5556,6 +5559,7 @@ def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5576,6 +5580,7 @@ def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5595,6 +5600,7 @@ def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5614,6 +5620,7 @@ def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5634,6 +5641,7 @@ def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5654,6 +5662,7 @@ def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5674,6 +5683,7 @@ def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5694,6 +5704,7 @@ def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5715,6 +5726,7 @@ def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5735,6 +5747,7 @@ def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5756,6 +5769,7 @@ def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5777,6 +5791,7 @@ def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5795,6 +5810,7 @@ def _VOP3Op_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5813,6 +5829,7 @@ def _VOP3Op_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5833,6 +5850,7 @@ def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5853,6 +5871,7 @@ def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5872,6 +5891,7 @@ def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5892,6 +5912,7 @@ def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5911,6 +5932,7 @@ def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5930,6 +5952,7 @@ def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5950,6 +5973,7 @@ def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5970,6 +5994,7 @@ def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5990,6 +6015,7 @@ def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6010,6 +6036,7 @@ def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6031,6 +6058,7 @@ def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6051,6 +6079,7 @@ def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6072,6 +6101,7 @@ def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6093,6 +6123,7 @@ def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6111,6 +6142,7 @@ def _VOP3Op_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6129,6 +6161,7 @@ def _VOP3Op_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6149,6 +6182,7 @@ def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6169,6 +6203,7 @@ def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6188,6 +6223,7 @@ def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6208,6 +6244,7 @@ def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6227,6 +6264,7 @@ def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6246,6 +6284,7 @@ def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6266,6 +6305,7 @@ def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6286,6 +6326,7 @@ def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6306,6 +6347,7 @@ def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6326,6 +6368,7 @@ def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6347,6 +6390,7 @@ def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6367,6 +6411,7 @@ def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6388,6 +6433,7 @@ def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6409,6 +6455,7 @@ def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6427,6 +6474,7 @@ def _VOP3Op_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6447,6 +6495,7 @@ def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6467,6 +6516,7 @@ def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6486,6 +6536,7 @@ def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6506,6 +6557,7 @@ def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6526,6 +6578,7 @@ def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6545,6 +6598,7 @@ def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6565,6 +6619,7 @@ def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6585,6 +6640,7 @@ def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6604,6 +6660,7 @@ def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6624,6 +6681,7 @@ def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6644,6 +6702,7 @@ def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6663,6 +6722,7 @@ def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6681,6 +6741,7 @@ def _VOP3Op_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6701,6 +6762,7 @@ def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6721,6 +6783,7 @@ def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6740,6 +6803,7 @@ def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6760,6 +6824,7 @@ def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6780,6 +6845,7 @@ def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6799,6 +6865,7 @@ def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6817,6 +6884,7 @@ def _VOP3Op_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6835,6 +6903,7 @@ def _VOP3Op_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6855,6 +6924,7 @@ def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6875,6 +6945,7 @@ def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6894,6 +6965,7 @@ def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6914,6 +6986,7 @@ def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6934,6 +7007,7 @@ def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6953,6 +7027,7 @@ def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6971,6 +7046,7 @@ def _VOP3Op_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6989,6 +7065,7 @@ def _VOP3Op_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7009,6 +7086,7 @@ def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7029,6 +7107,7 @@ def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7048,6 +7127,7 @@ def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7068,6 +7148,7 @@ def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7088,6 +7169,7 @@ def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7107,6 +7189,7 @@ def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7125,6 +7208,7 @@ def _VOP3Op_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7143,6 +7227,7 @@ def _VOP3Op_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7163,6 +7248,7 @@ def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7183,6 +7269,7 @@ def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7202,6 +7289,7 @@ def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7222,6 +7310,7 @@ def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7242,6 +7331,7 @@ def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7261,6 +7351,7 @@ def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7279,6 +7370,7 @@ def _VOP3Op_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7339,6 +7431,7 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7399,6 +7492,7 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7459,6 +7553,7 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7472,7 +7567,7 @@ def _VOP3Op_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7485,7 +7580,7 @@ def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7499,7 +7594,7 @@ def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7512,7 +7607,7 @@ def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7525,7 +7620,7 @@ def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7538,7 +7633,7 @@ def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7551,7 +7646,7 @@ def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7564,7 +7659,7 @@ def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7577,7 +7672,7 @@ def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7591,7 +7686,7 @@ def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7605,7 +7700,7 @@ def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7619,7 +7714,7 @@ def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7633,7 +7728,7 @@ def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7647,7 +7742,7 @@ def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7661,7 +7756,7 @@ def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7672,7 +7767,7 @@ def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7683,7 +7778,7 @@ def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7696,7 +7791,7 @@ def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7710,7 +7805,7 @@ def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7723,7 +7818,7 @@ def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7736,7 +7831,7 @@ def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7749,7 +7844,7 @@ def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7762,7 +7857,7 @@ def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7775,7 +7870,7 @@ def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7788,7 +7883,7 @@ def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7802,7 +7897,7 @@ def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7816,7 +7911,7 @@ def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7830,7 +7925,7 @@ def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7844,7 +7939,7 @@ def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7858,7 +7953,7 @@ def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7872,7 +7967,7 @@ def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7883,7 +7978,7 @@ def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7894,7 +7989,7 @@ def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7907,7 +8002,7 @@ def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7921,7 +8016,7 @@ def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7934,7 +8029,7 @@ def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7947,7 +8042,7 @@ def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7960,7 +8055,7 @@ def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7973,7 +8068,7 @@ def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7986,7 +8081,7 @@ def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7999,7 +8094,7 @@ def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8013,7 +8108,7 @@ def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8027,7 +8122,7 @@ def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8041,7 +8136,7 @@ def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8055,7 +8150,7 @@ def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8069,7 +8164,7 @@ def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8083,7 +8178,7 @@ def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8094,7 +8189,7 @@ def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8107,7 +8202,7 @@ def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8121,7 +8216,7 @@ def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8134,7 +8229,7 @@ def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8147,7 +8242,7 @@ def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8160,7 +8255,7 @@ def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8173,7 +8268,7 @@ def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8186,7 +8281,7 @@ def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8200,7 +8295,7 @@ def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8213,7 +8308,7 @@ def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8226,7 +8321,7 @@ def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8239,7 +8334,7 @@ def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8252,7 +8347,7 @@ def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8263,7 +8358,7 @@ def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8276,7 +8371,7 @@ def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8290,7 +8385,7 @@ def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8303,7 +8398,7 @@ def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8316,7 +8411,7 @@ def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8329,7 +8424,7 @@ def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8342,7 +8437,7 @@ def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8353,7 +8448,7 @@ def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8364,7 +8459,7 @@ def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8377,7 +8472,7 @@ def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8391,7 +8486,7 @@ def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8404,7 +8499,7 @@ def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8417,7 +8512,7 @@ def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8430,7 +8525,7 @@ def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8443,7 +8538,7 @@ def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8454,7 +8549,7 @@ def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8465,7 +8560,7 @@ def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8478,7 +8573,7 @@ def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8492,7 +8587,7 @@ def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8505,7 +8600,7 @@ def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8518,7 +8613,7 @@ def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8531,7 +8626,7 @@ def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8544,7 +8639,7 @@ def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8555,7 +8650,7 @@ def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8566,7 +8661,7 @@ def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8579,7 +8674,7 @@ def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8593,7 +8688,7 @@ def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8606,7 +8701,7 @@ def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8619,7 +8714,7 @@ def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8632,7 +8727,7 @@ def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8645,7 +8740,7 @@ def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8656,7 +8751,7 @@ def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8709,7 +8804,7 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8762,7 +8857,7 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8815,7 +8910,7 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -12162,6 +12257,37 @@ def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result +def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # shift = 32'I(S1[4 : 0].u32) * 53; + # if exponent(S0.f64) > 1077 then + # shift += exponent(S0.f64) - 1077 + # endif; + # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} + # // b_1200 is the MSB of the fractional part of 2.0/PI + # // Left shift operation indicates which bits are brought + # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); + # scale = -53 - shift; + # if exponent(S0.f64) >= 1968 then + # scale += 128 + # endif; + # D0.f64 = ldexp(result, scale) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + shift = (S1[4 : 0].u32) * 53 + if exponent(S0.f64) > 1077: + shift += exponent(S0.f64) - 1077 + result = float(((TWO_OVER_PI_1201[1200 : 0] << int(shift)) >> (1201 - 53)) & 0x1fffffffffffff) + scale = -53 - shift + if exponent(S0.f64) >= 1968: + scale += 128 + D0.f64 = ldexp(result, scale) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) @@ -12695,6 +12821,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MUL_LO_U32: _VOP3Op_V_MUL_LO_U32, VOP3Op.V_MUL_HI_U32: _VOP3Op_V_MUL_HI_U32, VOP3Op.V_MUL_HI_I32: _VOP3Op_V_MUL_HI_I32, + VOP3Op.V_TRIG_PREOP_F64: _VOP3Op_V_TRIG_PREOP_F64, VOP3Op.V_LSHLREV_B16: _VOP3Op_V_LSHLREV_B16, VOP3Op.V_LSHRREV_B16: _VOP3Op_V_LSHRREV_B16, VOP3Op.V_ASHRREV_I16: _VOP3Op_V_ASHRREV_I16, diff --git a/extra/assembly/amd/autogen/rdna4/gen_pcode.py b/extra/assembly/amd/autogen/rdna4/gen_pcode.py index 15a92ee453..70dd62eca1 100644 --- a/extra/assembly/amd/autogen/rdna4/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna4/gen_pcode.py @@ -5575,6 +5575,7 @@ def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5595,6 +5596,7 @@ def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5614,6 +5616,7 @@ def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5634,6 +5637,7 @@ def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5653,6 +5657,7 @@ def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5672,6 +5677,7 @@ def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5692,6 +5698,7 @@ def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5712,6 +5719,7 @@ def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5732,6 +5740,7 @@ def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5752,6 +5761,7 @@ def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5773,6 +5783,7 @@ def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5793,6 +5804,7 @@ def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5814,6 +5826,7 @@ def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5835,6 +5848,7 @@ def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5855,6 +5869,7 @@ def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5875,6 +5890,7 @@ def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5894,6 +5910,7 @@ def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5914,6 +5931,7 @@ def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5933,6 +5951,7 @@ def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5952,6 +5971,7 @@ def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5972,6 +5992,7 @@ def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5992,6 +6013,7 @@ def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6012,6 +6034,7 @@ def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6032,6 +6055,7 @@ def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6053,6 +6077,7 @@ def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6073,6 +6098,7 @@ def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6094,6 +6120,7 @@ def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6115,6 +6142,7 @@ def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6135,6 +6163,7 @@ def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6155,6 +6184,7 @@ def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6174,6 +6204,7 @@ def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6194,6 +6225,7 @@ def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6213,6 +6245,7 @@ def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6232,6 +6265,7 @@ def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6252,6 +6286,7 @@ def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6272,6 +6307,7 @@ def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6292,6 +6328,7 @@ def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6312,6 +6349,7 @@ def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6333,6 +6371,7 @@ def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6353,6 +6392,7 @@ def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6374,6 +6414,7 @@ def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6395,6 +6436,7 @@ def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6415,6 +6457,7 @@ def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6435,6 +6478,7 @@ def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6454,6 +6498,7 @@ def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6474,6 +6519,7 @@ def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6494,6 +6540,7 @@ def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6513,6 +6560,7 @@ def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6533,6 +6581,7 @@ def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6553,6 +6602,7 @@ def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6572,6 +6622,7 @@ def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6592,6 +6643,7 @@ def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6612,6 +6664,7 @@ def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6631,6 +6684,7 @@ def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6651,6 +6705,7 @@ def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6671,6 +6726,7 @@ def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6690,6 +6746,7 @@ def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6710,6 +6767,7 @@ def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6730,6 +6788,7 @@ def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6749,6 +6808,7 @@ def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6769,6 +6829,7 @@ def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6789,6 +6850,7 @@ def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6808,6 +6870,7 @@ def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6828,6 +6891,7 @@ def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6848,6 +6912,7 @@ def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6867,6 +6932,7 @@ def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6887,6 +6953,7 @@ def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6907,6 +6974,7 @@ def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6926,6 +6994,7 @@ def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6946,6 +7015,7 @@ def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6966,6 +7036,7 @@ def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6985,6 +7056,7 @@ def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7005,6 +7077,7 @@ def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7025,6 +7098,7 @@ def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7044,6 +7118,7 @@ def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7064,6 +7139,7 @@ def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7084,6 +7160,7 @@ def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7103,6 +7180,7 @@ def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7163,6 +7241,7 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7223,6 +7302,7 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7283,6 +7363,7 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7298,7 +7379,7 @@ def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7312,7 +7393,7 @@ def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7325,7 +7406,7 @@ def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7338,7 +7419,7 @@ def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7351,7 +7432,7 @@ def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7364,7 +7445,7 @@ def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7377,7 +7458,7 @@ def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7390,7 +7471,7 @@ def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7404,7 +7485,7 @@ def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7418,7 +7499,7 @@ def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7432,7 +7513,7 @@ def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7446,7 +7527,7 @@ def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7460,7 +7541,7 @@ def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7474,7 +7555,7 @@ def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7487,7 +7568,7 @@ def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7501,7 +7582,7 @@ def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7514,7 +7595,7 @@ def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7527,7 +7608,7 @@ def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7540,7 +7621,7 @@ def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7553,7 +7634,7 @@ def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7566,7 +7647,7 @@ def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7579,7 +7660,7 @@ def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7593,7 +7674,7 @@ def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7607,7 +7688,7 @@ def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7621,7 +7702,7 @@ def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7635,7 +7716,7 @@ def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7649,7 +7730,7 @@ def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7663,7 +7744,7 @@ def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7676,7 +7757,7 @@ def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7690,7 +7771,7 @@ def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7703,7 +7784,7 @@ def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7716,7 +7797,7 @@ def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7729,7 +7810,7 @@ def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7742,7 +7823,7 @@ def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7755,7 +7836,7 @@ def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7768,7 +7849,7 @@ def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7782,7 +7863,7 @@ def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7796,7 +7877,7 @@ def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7810,7 +7891,7 @@ def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7824,7 +7905,7 @@ def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7838,7 +7919,7 @@ def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7852,7 +7933,7 @@ def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7865,7 +7946,7 @@ def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7879,7 +7960,7 @@ def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7892,7 +7973,7 @@ def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7905,7 +7986,7 @@ def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7918,7 +7999,7 @@ def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7931,7 +8012,7 @@ def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7944,7 +8025,7 @@ def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7958,7 +8039,7 @@ def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7971,7 +8052,7 @@ def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7984,7 +8065,7 @@ def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7997,7 +8078,7 @@ def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8010,7 +8091,7 @@ def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8023,7 +8104,7 @@ def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8037,7 +8118,7 @@ def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8050,7 +8131,7 @@ def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8063,7 +8144,7 @@ def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8076,7 +8157,7 @@ def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8089,7 +8170,7 @@ def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8102,7 +8183,7 @@ def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8116,7 +8197,7 @@ def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8129,7 +8210,7 @@ def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8142,7 +8223,7 @@ def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8155,7 +8236,7 @@ def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8168,7 +8249,7 @@ def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8181,7 +8262,7 @@ def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8195,7 +8276,7 @@ def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8208,7 +8289,7 @@ def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8221,7 +8302,7 @@ def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8234,7 +8315,7 @@ def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8247,7 +8328,7 @@ def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8260,7 +8341,7 @@ def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8274,7 +8355,7 @@ def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8287,7 +8368,7 @@ def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8300,7 +8381,7 @@ def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8313,7 +8394,7 @@ def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8326,7 +8407,7 @@ def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8379,7 +8460,7 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8432,7 +8513,7 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8485,7 +8566,7 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -11985,6 +12066,37 @@ def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result +def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # shift = 32'I(S1[4 : 0].u32) * 53; + # if exponent(S0.f64) > 1077 then + # shift += exponent(S0.f64) - 1077 + # endif; + # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} + # // b_1200 is the MSB of the fractional part of 2.0/PI + # // Left shift operation indicates which bits are brought + # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); + # scale = -53 - shift; + # if exponent(S0.f64) >= 1968 then + # scale += 128 + # endif; + # D0.f64 = ldexp(result, scale) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + shift = (S1[4 : 0].u32) * 53 + if exponent(S0.f64) > 1077: + shift += exponent(S0.f64) - 1077 + result = float(((TWO_OVER_PI_1201[1200 : 0] << int(shift)) >> (1201 - 53)) & 0x1fffffffffffff) + scale = -53 - shift + if exponent(S0.f64) >= 1968: + scale += 128 + D0.f64 = ldexp(result, scale) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) @@ -12748,6 +12860,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MUL_LO_U32: _VOP3Op_V_MUL_LO_U32, VOP3Op.V_MUL_HI_U32: _VOP3Op_V_MUL_HI_U32, VOP3Op.V_MUL_HI_I32: _VOP3Op_V_MUL_HI_I32, + VOP3Op.V_TRIG_PREOP_F64: _VOP3Op_V_TRIG_PREOP_F64, VOP3Op.V_LSHLREV_B16: _VOP3Op_V_LSHLREV_B16, VOP3Op.V_LSHRREV_B16: _VOP3Op_V_LSHRREV_B16, VOP3Op.V_ASHRREV_I16: _VOP3Op_V_ASHRREV_I16, diff --git a/extra/assembly/amd/dsl.py b/extra/assembly/amd/dsl.py index 615597e81b..ef2f3aefdc 100644 --- a/extra/assembly/amd/dsl.py +++ b/extra/assembly/amd/dsl.py @@ -283,6 +283,10 @@ class Inst: from extra.assembly.amd.autogen.rdna3 import VOP3Op try: op_name = VOP3Op(op).name except ValueError: pass + if op_name is None and self.__class__.__name__ == 'VOPC': + from extra.assembly.amd.autogen.rdna3 import VOPCOp + try: op_name = VOPCOp(op).name + except ValueError: pass if op_name is None: return False # V_LDEXP_F64 has 32-bit integer exponent in src1, so literal is 32-bit if op_name == 'V_LDEXP_F64': return False diff --git a/extra/assembly/amd/emu.py b/extra/assembly/amd/emu.py index 7e9dbd014b..c99720aceb 100644 --- a/extra/assembly/amd/emu.py +++ b/extra/assembly/amd/emu.py @@ -17,6 +17,7 @@ VCC_LO, VCC_HI, NULL, EXEC_LO, EXEC_HI, SCC = SrcEnum.VCC_LO, SrcEnum.VCC_HI, Sr # VOP3 ops that use 64-bit operands (and thus 64-bit literals when src is 255) # Exception: V_LDEXP_F64 has 32-bit integer src1, so literal should NOT be 64-bit when src1=255 _VOP3_64BIT_OPS = {op.value for op in VOP3Op if op.name.endswith(('_F64', '_B64', '_I64', '_U64'))} +_VOPC_64BIT_OPS = {op.value for op in VOPCOp if op.name.endswith(('_F64', '_B64', '_I64', '_U64'))} # Ops where src1 is 32-bit (exponent/shift amount) even though the op name suggests 64-bit _VOP3_64BIT_OPS_32BIT_SRC1 = {VOP3Op.V_LDEXP_F64.value} # Ops with 16-bit types in name (for source/dest handling) @@ -185,7 +186,7 @@ def decode_program(data: bytes) -> Program: # Exception: some ops have mixed src sizes (e.g., V_LDEXP_F64 has 32-bit src1) op_val = inst._values.get('op') if hasattr(op_val, 'value'): op_val = op_val.value - is_64bit = inst_class is VOP3 and op_val in _VOP3_64BIT_OPS + is_64bit = (inst_class is VOP3 and op_val in _VOP3_64BIT_OPS) or (inst_class is VOPC and op_val in _VOPC_64BIT_OPS) # Don't treat literal as 64-bit if the op has 32-bit src1 and src1 is the literal if is_64bit and op_val in _VOP3_64BIT_OPS_32BIT_SRC1 and getattr(inst, 'src1', None) == 255: is_64bit = False @@ -336,14 +337,22 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No op = VOP3SDOp(inst.op) fn = compiled.get(VOP3SDOp, {}).get(op) if fn is None: raise NotImplementedError(f"{op.name} not in pseudocode") - s0, s1, s2 = st.rsrc(inst.src0, lane), st.rsrc(inst.src1, lane), st.rsrc(inst.src2, lane) - # For 64-bit src2 ops (V_MAD_U64_U32, V_MAD_I64_I32), read from consecutive registers + # VOP3SD has both 32-bit ops (V_ADD_CO_CI_U32, etc.) and 64-bit ops (V_DIV_SCALE_F64, V_MAD_U64_U32, etc.) + div_scale_64_ops = (VOP3SDOp.V_DIV_SCALE_F64,) mad64_ops = (VOP3SDOp.V_MAD_U64_U32, VOP3SDOp.V_MAD_I64_I32) - if op in mad64_ops: + if op in div_scale_64_ops: + # V_DIV_SCALE_F64: all sources are 64-bit + s0, s1, s2 = st.rsrc64(inst.src0, lane), st.rsrc64(inst.src1, lane), st.rsrc64(inst.src2, lane) + elif op in mad64_ops: + # V_MAD_U64_U32, V_MAD_I64_I32: src0/src1 are 32-bit, src2 is 64-bit + s0, s1 = st.rsrc(inst.src0, lane), st.rsrc(inst.src1, lane) if inst.src2 >= 256: # VGPR s2 = V[inst.src2 - 256] | (V[inst.src2 - 256 + 1] << 32) else: # SGPR - read 64-bit from consecutive SGPRs s2 = st.rsgpr64(inst.src2) + else: + # Default: 32-bit sources + s0, s1, s2 = st.rsrc(inst.src0, lane), st.rsrc(inst.src1, lane), st.rsrc(inst.src2, lane) d0 = V[inst.vdst] # For carry-in operations (V_*_CO_CI_*), src2 register contains the carry bitmask (not VCC). # The pseudocode uses VCC but in VOP3SD encoding, the actual carry source is inst.src2. @@ -516,8 +525,9 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No # For 64-bit shift ops: src0 is 32-bit (shift amount), src1 is 64-bit (value to shift) # For most other _B64/_I64/_U64/_F64 ops: all sources are 64-bit is_64bit_op = op.name.endswith(('_B64', '_I64', '_U64', '_F64')) - # V_LDEXP_F64: src0 is 64-bit float, src1 is 32-bit integer exponent - is_ldexp_64 = op in (VOP3Op.V_LDEXP_F64,) + # V_LDEXP_F64, V_TRIG_PREOP_F64, V_CMP_CLASS_F64, V_CMPX_CLASS_F64: src0 is 64-bit, src1 is 32-bit + is_ldexp_64 = op in (VOP3Op.V_LDEXP_F64, VOP3Op.V_TRIG_PREOP_F64, VOP3Op.V_CMP_CLASS_F64, VOP3Op.V_CMPX_CLASS_F64, + VOPCOp.V_CMP_CLASS_F64, VOPCOp.V_CMPX_CLASS_F64) is_shift_64 = op in (VOP3Op.V_LSHLREV_B64, VOP3Op.V_LSHRREV_B64, VOP3Op.V_ASHRREV_I64) # 16-bit source ops: use precomputed sets instead of string checks # Note: must check op_cls to avoid cross-enum value collisions @@ -531,7 +541,12 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No s2 = mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0 elif is_ldexp_64: s0 = mod_src64(st.rsrc64(src0, lane), 0) # mantissa is 64-bit float - s1 = mod_src(st.rsrc(src1, lane), 1) if src1 is not None else 0 # exponent is 32-bit int + # src1 is 32-bit int. For 64-bit ops (like V_CMP_CLASS_F64), the literal is stored shifted left by 32. + # For V_LDEXP_F64/V_TRIG_PREOP_F64, _is_64bit_op() returns False so literal is stored as-is. + s1_raw = st.rsrc(src1, lane) if src1 is not None else 0 + # Only shift if src1 is literal AND this is a true 64-bit op (V_CMP_CLASS ops, not LDEXP/TRIG_PREOP) + is_class_op = op in (VOP3Op.V_CMP_CLASS_F64, VOP3Op.V_CMPX_CLASS_F64, VOPCOp.V_CMP_CLASS_F64, VOPCOp.V_CMPX_CLASS_F64) + s1 = mod_src((s1_raw >> 32) if src1 == 255 and is_class_op else s1_raw, 1) s2 = mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0 elif is_64bit_op: # 64-bit ops: apply neg/abs modifiers using f64 interpretation for float ops @@ -651,7 +666,7 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No is_16bit_dst = (op_cls is VOP3Op and op in _VOP3_16BIT_DST_OPS) or (op_cls is VOP1Op and op in _VOP1_16BIT_DST_OPS) if writes_to_sgpr: st.wsgpr(vdst, result['d0'] & 0xffffffff) - elif result.get('d0_64') or is_64bit_op: + elif result.get('d0_64'): V[vdst] = result['d0'] & 0xffffffff V[vdst + 1] = (result['d0'] >> 32) & 0xffffffff elif is_16bit_dst and inst_type is VOP3: diff --git a/extra/assembly/amd/pcode.py b/extra/assembly/amd/pcode.py index c230384324..6cef8ff2e8 100644 --- a/extra/assembly/amd/pcode.py +++ b/extra/assembly/amd/pcode.py @@ -280,7 +280,7 @@ def f32_to_u8(f): return max(0, min(255, int(f))) if not math.isnan(f) else 0 def mantissa(f): if f == 0.0 or math.isinf(f) or math.isnan(f): return f m, _ = math.frexp(f) - return math.copysign(m * 2.0, f) + return m # AMD V_FREXP_MANT returns mantissa in [0.5, 1.0) range def signext_from_bit(val, bit): bit = int(bit) if bit == 0: return 0 @@ -301,6 +301,7 @@ __all__ = [ # Constants 'WAVE32', 'WAVE64', 'MASK32', 'MASK64', 'WAVE_MODE', 'DENORM', 'OVERFLOW_F32', 'UNDERFLOW_F32', 'OVERFLOW_F64', 'UNDERFLOW_F64', 'MAX_FLOAT_F32', 'ROUND_MODE', 'cvtToQuietNAN', 'DST', 'INF', 'PI', + 'TWO_OVER_PI_1201', # Aliases for pseudocode 's_ff1_i32_b32', 's_ff1_i32_b64', 'GT_NEG_ZERO', 'LT_NEG_ZERO', 'isNAN', 'isQuietNAN', 'isSignalNAN', 'fma', 'ldexp', 'sign', 'exponent', 'F', 'signext', @@ -359,12 +360,14 @@ class _Inf: f16 = f32 = f64 = float('inf') def __neg__(self): return _NegInf() def __pos__(self): return self + def __float__(self): return float('inf') def __eq__(self, other): return float(other) == float('inf') if not isinstance(other, _NegInf) else False def __req__(self, other): return self.__eq__(other) class _NegInf: f16 = f32 = f64 = float('-inf') def __neg__(self): return _Inf() def __pos__(self): return self + def __float__(self): return float('-inf') def __eq__(self, other): return float(other) == float('-inf') if not isinstance(other, _Inf) else False def __req__(self, other): return self.__eq__(other) INF = _Inf() @@ -380,6 +383,31 @@ DST = None # Placeholder, will be set in context MASK32, MASK64 = 0xffffffff, 0xffffffffffffffff +# 2/PI with 1201 bits of precision for V_TRIG_PREOP_F64 +# Computed as: int((2/pi) * 2^1201) - this is the fractional part of 2/pi scaled to integer +# The MSB (bit 1200) corresponds to 2^0 position in the fraction 0.b1200 b1199 ... b1 b0 +_TWO_OVER_PI_1201_RAW = 0x0145f306dc9c882a53f84eafa3ea69bb81b6c52b3278872083fca2c757bd778ac36e48dc74849ba5c00c925dd413a32439fc3bd63962534e7dd1046bea5d768909d338e04d68befc827323ac7306a673e93908bf177bf250763ff12fffbc0b301fde5e2316b414da3eda6cfd9e4f96136e9e8c7ecd3cbfd45aea4f758fd7cbe2f67a0e73ef14a525d4d7f6bf623f1aba10ac06608df8f6 + +class _BigInt: + """Wrapper for large integers that supports bit slicing [high:low].""" + __slots__ = ('_val',) + def __init__(self, val): self._val = val + def __getitem__(self, key): + if isinstance(key, slice): + high, low = key.start, key.stop + if high < low: high, low = low, high # Handle reversed slice + mask = (1 << (high - low + 1)) - 1 + return (self._val >> low) & mask + return (self._val >> key) & 1 + def __int__(self): return self._val + def __index__(self): return self._val + def __lshift__(self, n): return self._val << int(n) + def __rshift__(self, n): return self._val >> int(n) + def __and__(self, n): return self._val & int(n) + def __or__(self, n): return self._val | int(n) + +TWO_OVER_PI_1201 = _BigInt(_TWO_OVER_PI_1201_RAW) + class _WaveMode: IEEE = False WAVE_MODE = _WaveMode() @@ -693,6 +721,9 @@ def _expr(e: str) -> str: return f'_pack({hi}, {lo})' e = re.sub(r'\{\s*([^,{}]+)\s*,\s*([^,{}]+)\s*\}', pack, e) + # Special constant: 1201'B(2.0 / PI) -> TWO_OVER_PI_1201 (precomputed 1201-bit 2/pi) + e = re.sub(r"1201'B\(2\.0\s*/\s*PI\)", "TWO_OVER_PI_1201", e) + # Literals: 1'0U -> 0, 32'I(x) -> (x), B(x) -> (x) e = re.sub(r"\d+'([0-9a-fA-Fx]+)[UuFf]*", r'\1', e) e = re.sub(r"\d+'[FIBU]\(", "(", e) @@ -815,7 +846,7 @@ INST_PATTERN = re.compile(r'^([SV]_[A-Z0-9_]+)\s+(\d+)\s*$', re.M) UNSUPPORTED = ['SGPR[', 'V_SWAP', 'eval ', 'FATAL_HALT', 'HW_REGISTERS', 'vscnt', 'vmcnt', 'expcnt', 'lgkmcnt', 'CVT_OFF_TABLE', 'ThreadMask', - 'S1[i', 'C.i32', 'S[i]', 'in[', '2.0 / PI', + 'S1[i', 'C.i32', 'S[i]', 'in[', 'if n.', 'DST.u32', 'addrd = DST', 'addr = DST'] # Malformed pseudocode from PDF def extract_pseudocode(text: str) -> str | None: @@ -1050,12 +1081,22 @@ from extra.assembly.amd.pcode import * code = code.replace( 'D0.f64 = ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64)))', 'D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64)))') + # V_TRIG_PREOP_F64: AMD pseudocode uses (x << shift) & mask but mask needs to extract TOP bits. + # The PDF shows: result = 64'F((1201'B(2.0/PI)[1200:0] << shift) & 1201'0x1fffffffffffff) + # Issues to fix: + # 1. After left shift, the interesting bits are at the top, not bottom - need >> (1201-53) + # 2. shift.u32 fails because shift is a plain int after * 53 - use int(shift) + # 3. 64'F(...) means convert int to float (not interpret as bit pattern) - use float() + if op.name == 'V_TRIG_PREOP_F64': + code = code.replace( + 'result = F((TWO_OVER_PI_1201[1200 : 0] << shift.u32) & 0x1fffffffffffff)', + 'result = float(((TWO_OVER_PI_1201[1200 : 0] << int(shift)) >> (1201 - 53)) & 0x1fffffffffffff)') # Detect flags for result handling is_64 = any(p in pc for p in ['D0.u64', 'D0.b64', 'D0.f64', 'D0.i64', 'D1.u64', 'D1.b64', 'D1.f64', 'D1.i64']) has_d1 = '{ D1' in pc if has_d1: is_64 = True - is_cmp = cls_name == 'VOPCOp' and 'D0.u64[laneId]' in pc - is_cmpx = cls_name == 'VOPCOp' and 'EXEC.u64[laneId]' in pc # V_CMPX writes to EXEC per-lane + is_cmp = (cls_name == 'VOPCOp' or cls_name == 'VOP3Op') and 'D0.u64[laneId]' in pc + is_cmpx = (cls_name == 'VOPCOp' or cls_name == 'VOP3Op') and 'EXEC.u64[laneId]' in pc # V_CMPX writes to EXEC per-lane # V_DIV_SCALE passes through S0 if no branch taken is_div_scale = 'DIV_SCALE' in op.name # VOP3SD instructions that write VCC per-lane (either via VCC.u64[laneId] or by setting VCC = 0/1) diff --git a/extra/assembly/amd/test/test_emu.py b/extra/assembly/amd/test/test_emu.py index e9055070ad..3761ddf356 100644 --- a/extra/assembly/amd/test/test_emu.py +++ b/extra/assembly/amd/test/test_emu.py @@ -2454,6 +2454,82 @@ class TestF64Conversions(unittest.TestCase): result = struct.unpack('> 32), + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], s[1]), + s_mov_b32(s[2], 0xDEADBEEF), # Canary value + v_mov_b32_e32(v[3], s[2]), # Put canary in v3 + v_cvt_i32_f64_e32(v[2], v[0:2]), # Convert -1.0 -> -1 (0xffffffff) + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] + canary = st.vgpr[0][3] + # V_CVT_I32_F64 of -1.0 should produce 0xffffffff (-1) + self.assertEqual(result, 0xffffffff, f"Expected 0xffffffff (-1), got 0x{result:08x}") + # v3 should still contain the canary (not clobbered by 64-bit write) + self.assertEqual(canary, 0xDEADBEEF, f"v3 canary should be 0xDEADBEEF, got 0x{canary:08x} (clobbered!)") + + def test_v_frexp_mant_f64_range(self): + """V_FREXP_MANT_F64 should return mantissa in [0.5, 1.0) range. + + Regression test: The mantissa() helper was incorrectly multiplying by 2.0, + returning values in [1.0, 2.0) instead of the correct [0.5, 1.0) range. + """ + # Test with 2.0: frexp(2.0) should give mantissa=0.5, exponent=2 + two_f64 = f2i64(2.0) + instructions = [ + s_mov_b32(s[0], two_f64 & 0xffffffff), + s_mov_b32(s[1], two_f64 >> 32), + v_frexp_mant_f64_e32(v[0:2], s[0:2]), + v_frexp_exp_i32_f64_e32(v[2], s[0:2]), + ] + st = run_program(instructions, n_lanes=1) + mant = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + exp = st.vgpr[0][2] + if exp >= 0x80000000: exp -= 0x100000000 # sign extend + # frexp(2.0) = 0.5 * 2^2 + self.assertAlmostEqual(mant, 0.5, places=10, msg=f"Expected mantissa 0.5, got {mant}") + self.assertEqual(exp, 2, f"Expected exponent 2, got {exp}") + + def test_v_div_scale_f64_reads_64bit_sources(self): + """V_DIV_SCALE_F64 must read all sources as 64-bit values. + + Regression test: VOP3SD was reading sources as 32-bit for V_DIV_SCALE_F64, + causing incorrect results when the low 32 bits happened to look like 0 or denorm. + """ + # Set up v0:v1 = sqrt(2) ≈ 1.414, v2:v3 = 1.0 + sqrt2_f64 = f2i64(1.4142135623730951) + one_f64 = f2i64(1.0) + instructions = [ + s_mov_b32(s[0], sqrt2_f64 & 0xffffffff), + s_mov_b32(s[1], sqrt2_f64 >> 32), + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], s[1]), + s_mov_b32(s[2], one_f64 & 0xffffffff), + s_mov_b32(s[3], one_f64 >> 32), + v_mov_b32_e32(v[2], s[2]), + v_mov_b32_e32(v[3], s[3]), + # V_DIV_SCALE_F64: src0=v0:v1, src1=v0:v1, src2=v2:v3 + # For normal inputs, should pass through src0 unchanged + VOP3SD(VOP3SDOp.V_DIV_SCALE_F64, vdst=v[4], sdst=s[10], src0=v[0], src1=v[0], src2=v[2]), + ] + st = run_program(instructions, n_lanes=1) + result = i642f(st.vgpr[0][4] | (st.vgpr[0][5] << 32)) + # For normal (non-denorm, non-edge-case) inputs, V_DIV_SCALE_F64 passes through src0 + self.assertAlmostEqual(result, 1.4142135623730951, places=10, + msg=f"Expected ~1.414, got {result} (may be nan if 64-bit sources not read correctly)") + class TestNewPcodeHelpers(unittest.TestCase): """Tests for newly added pcode helper functions (SAD, BYTE_PERMUTE, BF16).""" @@ -3650,3 +3726,90 @@ class TestVFmaMixSinCase(unittest.TestCase): # Result should be approximately -π = -3.14... # f16 -π ≈ 0xc248 = -3.140625 self.assertAlmostEqual(lo, -3.14159, delta=0.01, msg=f"Expected ~-π, got {lo}") + + +class TestVTrigPreopF64(unittest.TestCase): + """Tests for V_TRIG_PREOP_F64 instruction. + + V_TRIG_PREOP_F64 extracts chunks of 2/PI for Payne-Hanek trig range reduction. + For input S0 (f64) and index S1 (0, 1, or 2), it returns a portion of 2/PI + scaled appropriately for computing |S0| * (2/PI) in extended precision. + + The three chunks (index 0, 1, 2) when summed should equal 2/PI. + """ + + def test_trig_preop_f64_index0(self): + """V_TRIG_PREOP_F64 index=0: primary chunk of 2/PI.""" + import math + two_over_pi = 2.0 / math.pi + instructions = [ + # S0 = 1.0 (f64), S1 = 0 (index) + s_mov_b32(s[0], 0x00000000), # low bits of 1.0 + s_mov_b32(s[1], 0x3ff00000), # high bits of 1.0 + v_trig_preop_f64(v[0], abs(s[0]), 0), # index 0 + ] + st = run_program(instructions, n_lanes=1) + result = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + # For x=1.0, index=0 should give the main part of 2/PI + self.assertAlmostEqual(result, two_over_pi, places=10, msg=f"Expected ~{two_over_pi}, got {result}") + + def test_trig_preop_f64_index1(self): + """V_TRIG_PREOP_F64 index=1: secondary chunk (extended precision bits).""" + instructions = [ + s_mov_b32(s[0], 0x00000000), # low bits of 1.0 + s_mov_b32(s[1], 0x3ff00000), # high bits of 1.0 + v_trig_preop_f64(v[0], abs(s[0]), 1), # index 1 + ] + st = run_program(instructions, n_lanes=1) + result = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + # Index 1 gives the next 53 bits, should be very small (~1e-16) + self.assertLess(abs(result), 1e-15, msg=f"Expected tiny value, got {result}") + self.assertGreater(abs(result), 0, msg="Expected non-zero value") + + def test_trig_preop_f64_index2(self): + """V_TRIG_PREOP_F64 index=2: tertiary chunk (more extended precision bits).""" + instructions = [ + s_mov_b32(s[0], 0x00000000), # low bits of 1.0 + s_mov_b32(s[1], 0x3ff00000), # high bits of 1.0 + v_trig_preop_f64(v[0], abs(s[0]), 2), # index 2 + ] + st = run_program(instructions, n_lanes=1) + result = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + # Index 2 gives the next 53 bits after index 1, should be tiny (~1e-32) + self.assertLess(abs(result), 1e-30, msg=f"Expected very tiny value, got {result}") + + def test_trig_preop_f64_sum_equals_two_over_pi(self): + """V_TRIG_PREOP_F64: sum of chunks 0,1,2 should equal 2/PI.""" + import math + two_over_pi = 2.0 / math.pi + instructions = [ + s_mov_b32(s[0], 0x00000000), # low bits of 1.0 + s_mov_b32(s[1], 0x3ff00000), # high bits of 1.0 + v_trig_preop_f64(v[0], abs(s[0]), 0), # index 0 -> v[0:1] + v_trig_preop_f64(v[2], abs(s[0]), 1), # index 1 -> v[2:3] + v_trig_preop_f64(v[4], abs(s[0]), 2), # index 2 -> v[4:5] + ] + st = run_program(instructions, n_lanes=1) + p0 = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + p1 = i642f(st.vgpr[0][2] | (st.vgpr[0][3] << 32)) + p2 = i642f(st.vgpr[0][4] | (st.vgpr[0][5] << 32)) + total = p0 + p1 + p2 + self.assertAlmostEqual(total, two_over_pi, places=14, msg=f"Expected {two_over_pi}, got {total} (p0={p0}, p1={p1}, p2={p2})") + + def test_trig_preop_f64_large_input(self): + """V_TRIG_PREOP_F64 with larger input should adjust shift based on exponent.""" + import math + # For x=2.0, exponent(2.0)=1024 which is <= 1077, so no adjustment + # But let's test with x=2^60 where exponent > 1077 + large_val = 2.0 ** 60 # exponent = 1083 > 1077 + large_bits = f2i64(large_val) + instructions = [ + s_mov_b32(s[0], large_bits & 0xffffffff), + s_mov_b32(s[1], (large_bits >> 32) & 0xffffffff), + v_trig_preop_f64(v[0], abs(s[0]), 0), + ] + st = run_program(instructions, n_lanes=1) + result = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + # Result should still be a valid float (not NaN or inf) + self.assertFalse(math.isnan(result), "Result should not be NaN") + self.assertFalse(math.isinf(result), "Result should not be inf")